kern_alq.c revision 206026
1/*- 2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 3 * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org> 4 * Copyright (c) 2009-2010, The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed at the Centre for Advanced 8 * Internet Architectures, Swinburne University of Technology, Melbourne, 9 * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice unmodified, this list of conditions, and the following 16 * disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> 34__FBSDID("$FreeBSD: head/sys/kern/kern_alq.c 206026 2010-04-01 01:16:00Z lstewart $"); 35 36#include "opt_mac.h" 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/kernel.h> 41#include <sys/kthread.h> 42#include <sys/lock.h> 43#include <sys/mount.h> 44#include <sys/mutex.h> 45#include <sys/namei.h> 46#include <sys/proc.h> 47#include <sys/vnode.h> 48#include <sys/alq.h> 49#include <sys/malloc.h> 50#include <sys/unistd.h> 51#include <sys/fcntl.h> 52#include <sys/eventhandler.h> 53 54#include <security/mac/mac_framework.h> 55 56/* Async. Logging Queue */ 57struct alq { 58 int aq_entmax; /* Max entries */ 59 int aq_entlen; /* Entry length */ 60 char *aq_entbuf; /* Buffer for stored entries */ 61 int aq_flags; /* Queue flags */ 62 struct mtx aq_mtx; /* Queue lock */ 63 struct vnode *aq_vp; /* Open vnode handle */ 64 struct ucred *aq_cred; /* Credentials of the opening thread */ 65 struct ale *aq_first; /* First ent */ 66 struct ale *aq_entfree; /* First free ent */ 67 struct ale *aq_entvalid; /* First ent valid for writing */ 68 LIST_ENTRY(alq) aq_act; /* List of active queues */ 69 LIST_ENTRY(alq) aq_link; /* List of all queues */ 70}; 71 72#define AQ_WANTED 0x0001 /* Wakeup sleeper when io is done */ 73#define AQ_ACTIVE 0x0002 /* on the active list */ 74#define AQ_FLUSHING 0x0004 /* doing IO */ 75#define AQ_SHUTDOWN 0x0008 /* Queue no longer valid */ 76 77#define ALQ_LOCK(alq) mtx_lock_spin(&(alq)->aq_mtx) 78#define ALQ_UNLOCK(alq) mtx_unlock_spin(&(alq)->aq_mtx) 79 80static MALLOC_DEFINE(M_ALD, "ALD", "ALD"); 81 82/* 83 * The ald_mtx protects the ald_queues list and the ald_active list. 84 */ 85static struct mtx ald_mtx; 86static LIST_HEAD(, alq) ald_queues; 87static LIST_HEAD(, alq) ald_active; 88static int ald_shutingdown = 0; 89struct thread *ald_thread; 90static struct proc *ald_proc; 91 92#define ALD_LOCK() mtx_lock(&ald_mtx) 93#define ALD_UNLOCK() mtx_unlock(&ald_mtx) 94 95/* Daemon functions */ 96static int ald_add(struct alq *); 97static int ald_rem(struct alq *); 98static void ald_startup(void *); 99static void ald_daemon(void); 100static void ald_shutdown(void *, int); 101static void ald_activate(struct alq *); 102static void ald_deactivate(struct alq *); 103 104/* Internal queue functions */ 105static void alq_shutdown(struct alq *); 106static void alq_destroy(struct alq *); 107static int alq_doio(struct alq *); 108 109 110/* 111 * Add a new queue to the global list. Fail if we're shutting down. 112 */ 113static int 114ald_add(struct alq *alq) 115{ 116 int error; 117 118 error = 0; 119 120 ALD_LOCK(); 121 if (ald_shutingdown) { 122 error = EBUSY; 123 goto done; 124 } 125 LIST_INSERT_HEAD(&ald_queues, alq, aq_link); 126done: 127 ALD_UNLOCK(); 128 return (error); 129} 130 131/* 132 * Remove a queue from the global list unless we're shutting down. If so, 133 * the ald will take care of cleaning up it's resources. 134 */ 135static int 136ald_rem(struct alq *alq) 137{ 138 int error; 139 140 error = 0; 141 142 ALD_LOCK(); 143 if (ald_shutingdown) { 144 error = EBUSY; 145 goto done; 146 } 147 LIST_REMOVE(alq, aq_link); 148done: 149 ALD_UNLOCK(); 150 return (error); 151} 152 153/* 154 * Put a queue on the active list. This will schedule it for writing. 155 */ 156static void 157ald_activate(struct alq *alq) 158{ 159 LIST_INSERT_HEAD(&ald_active, alq, aq_act); 160 wakeup(&ald_active); 161} 162 163static void 164ald_deactivate(struct alq *alq) 165{ 166 LIST_REMOVE(alq, aq_act); 167 alq->aq_flags &= ~AQ_ACTIVE; 168} 169 170static void 171ald_startup(void *unused) 172{ 173 mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET); 174 LIST_INIT(&ald_queues); 175 LIST_INIT(&ald_active); 176} 177 178static void 179ald_daemon(void) 180{ 181 int needwakeup; 182 struct alq *alq; 183 184 ald_thread = FIRST_THREAD_IN_PROC(ald_proc); 185 186 EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL, 187 SHUTDOWN_PRI_FIRST); 188 189 ALD_LOCK(); 190 191 for (;;) { 192 while ((alq = LIST_FIRST(&ald_active)) == NULL && 193 !ald_shutingdown) 194 msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0); 195 196 /* Don't shutdown until all active ALQs are flushed. */ 197 if (ald_shutingdown && alq == NULL) { 198 ALD_UNLOCK(); 199 break; 200 } 201 202 ALQ_LOCK(alq); 203 ald_deactivate(alq); 204 ALD_UNLOCK(); 205 needwakeup = alq_doio(alq); 206 ALQ_UNLOCK(alq); 207 if (needwakeup) 208 wakeup(alq); 209 ALD_LOCK(); 210 } 211 212 kproc_exit(0); 213} 214 215static void 216ald_shutdown(void *arg, int howto) 217{ 218 struct alq *alq; 219 220 ALD_LOCK(); 221 222 /* Ensure no new queues can be created. */ 223 ald_shutingdown = 1; 224 225 /* Shutdown all ALQs prior to terminating the ald_daemon. */ 226 while ((alq = LIST_FIRST(&ald_queues)) != NULL) { 227 LIST_REMOVE(alq, aq_link); 228 ALD_UNLOCK(); 229 alq_shutdown(alq); 230 ALD_LOCK(); 231 } 232 233 /* At this point, all ALQs are flushed and shutdown. */ 234 235 /* 236 * Wake ald_daemon so that it exits. It won't be able to do 237 * anything until we msleep because we hold the ald_mtx. 238 */ 239 wakeup(&ald_active); 240 241 /* Wait for ald_daemon to exit. */ 242 msleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0); 243 244 ALD_UNLOCK(); 245} 246 247static void 248alq_shutdown(struct alq *alq) 249{ 250 ALQ_LOCK(alq); 251 252 /* Stop any new writers. */ 253 alq->aq_flags |= AQ_SHUTDOWN; 254 255 /* Drain IO */ 256 while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) { 257 alq->aq_flags |= AQ_WANTED; 258 msleep_spin(alq, &alq->aq_mtx, "aldclose", 0); 259 } 260 ALQ_UNLOCK(alq); 261 262 vn_close(alq->aq_vp, FWRITE, alq->aq_cred, 263 curthread); 264 crfree(alq->aq_cred); 265} 266 267void 268alq_destroy(struct alq *alq) 269{ 270 /* Drain all pending IO. */ 271 alq_shutdown(alq); 272 273 mtx_destroy(&alq->aq_mtx); 274 free(alq->aq_first, M_ALD); 275 free(alq->aq_entbuf, M_ALD); 276 free(alq, M_ALD); 277} 278 279/* 280 * Flush all pending data to disk. This operation will block. 281 */ 282static int 283alq_doio(struct alq *alq) 284{ 285 struct thread *td; 286 struct mount *mp; 287 struct vnode *vp; 288 struct uio auio; 289 struct iovec aiov[2]; 290 struct ale *ale; 291 struct ale *alstart; 292 int totlen; 293 int iov; 294 int vfslocked; 295 296 vp = alq->aq_vp; 297 td = curthread; 298 totlen = 0; 299 iov = 0; 300 301 alstart = ale = alq->aq_entvalid; 302 alq->aq_entvalid = NULL; 303 304 bzero(&aiov, sizeof(aiov)); 305 bzero(&auio, sizeof(auio)); 306 307 do { 308 if (aiov[iov].iov_base == NULL) 309 aiov[iov].iov_base = ale->ae_data; 310 aiov[iov].iov_len += alq->aq_entlen; 311 totlen += alq->aq_entlen; 312 /* Check to see if we're wrapping the buffer */ 313 if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data) 314 iov++; 315 ale->ae_flags &= ~AE_VALID; 316 ale = ale->ae_next; 317 } while (ale->ae_flags & AE_VALID); 318 319 alq->aq_flags |= AQ_FLUSHING; 320 ALQ_UNLOCK(alq); 321 322 if (iov == 2 || aiov[iov].iov_base == NULL) 323 iov--; 324 325 auio.uio_iov = &aiov[0]; 326 auio.uio_offset = 0; 327 auio.uio_segflg = UIO_SYSSPACE; 328 auio.uio_rw = UIO_WRITE; 329 auio.uio_iovcnt = iov + 1; 330 auio.uio_resid = totlen; 331 auio.uio_td = td; 332 333 /* 334 * Do all of the junk required to write now. 335 */ 336 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 337 vn_start_write(vp, &mp, V_WAIT); 338 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 339 /* 340 * XXX: VOP_WRITE error checks are ignored. 341 */ 342#ifdef MAC 343 if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0) 344#endif 345 VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred); 346 VOP_UNLOCK(vp, 0); 347 vn_finished_write(mp); 348 VFS_UNLOCK_GIANT(vfslocked); 349 350 ALQ_LOCK(alq); 351 alq->aq_flags &= ~AQ_FLUSHING; 352 353 if (alq->aq_entfree == NULL) 354 alq->aq_entfree = alstart; 355 356 if (alq->aq_flags & AQ_WANTED) { 357 alq->aq_flags &= ~AQ_WANTED; 358 return (1); 359 } 360 361 return(0); 362} 363 364static struct kproc_desc ald_kp = { 365 "ALQ Daemon", 366 ald_daemon, 367 &ald_proc 368}; 369 370SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp); 371SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL); 372 373 374/* User visible queue functions */ 375 376/* 377 * Create the queue data structure, allocate the buffer, and open the file. 378 */ 379int 380alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode, 381 int size, int count) 382{ 383 struct thread *td; 384 struct nameidata nd; 385 struct ale *ale; 386 struct ale *alp; 387 struct alq *alq; 388 char *bufp; 389 int flags; 390 int error; 391 int i, vfslocked; 392 393 *alqp = NULL; 394 td = curthread; 395 396 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td); 397 flags = FWRITE | O_NOFOLLOW | O_CREAT; 398 399 error = vn_open_cred(&nd, &flags, cmode, 0, cred, NULL); 400 if (error) 401 return (error); 402 403 vfslocked = NDHASGIANT(&nd); 404 NDFREE(&nd, NDF_ONLY_PNBUF); 405 /* We just unlock so we hold a reference */ 406 VOP_UNLOCK(nd.ni_vp, 0); 407 VFS_UNLOCK_GIANT(vfslocked); 408 409 alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO); 410 alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO); 411 alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO); 412 alq->aq_vp = nd.ni_vp; 413 alq->aq_cred = crhold(cred); 414 alq->aq_entmax = count; 415 alq->aq_entlen = size; 416 alq->aq_entfree = alq->aq_first; 417 418 mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET); 419 420 bufp = alq->aq_entbuf; 421 ale = alq->aq_first; 422 alp = NULL; 423 424 /* Match up entries with buffers */ 425 for (i = 0; i < count; i++) { 426 if (alp) 427 alp->ae_next = ale; 428 ale->ae_data = bufp; 429 alp = ale; 430 ale++; 431 bufp += size; 432 } 433 434 alp->ae_next = alq->aq_first; 435 436 if ((error = ald_add(alq)) != 0) { 437 alq_destroy(alq); 438 return (error); 439 } 440 441 *alqp = alq; 442 443 return (0); 444} 445 446/* 447 * Copy a new entry into the queue. If the operation would block either 448 * wait or return an error depending on the value of waitok. 449 */ 450int 451alq_write(struct alq *alq, void *data, int waitok) 452{ 453 struct ale *ale; 454 455 if ((ale = alq_get(alq, waitok)) == NULL) 456 return (EWOULDBLOCK); 457 458 bcopy(data, ale->ae_data, alq->aq_entlen); 459 alq_post(alq, ale); 460 461 return (0); 462} 463 464struct ale * 465alq_get(struct alq *alq, int waitok) 466{ 467 struct ale *ale; 468 struct ale *aln; 469 470 ale = NULL; 471 472 ALQ_LOCK(alq); 473 474 /* Loop until we get an entry or we're shutting down */ 475 while ((alq->aq_flags & AQ_SHUTDOWN) == 0 && 476 (ale = alq->aq_entfree) == NULL && 477 (waitok & ALQ_WAITOK)) { 478 alq->aq_flags |= AQ_WANTED; 479 msleep_spin(alq, &alq->aq_mtx, "alqget", 0); 480 } 481 482 if (ale != NULL) { 483 aln = ale->ae_next; 484 if ((aln->ae_flags & AE_VALID) == 0) 485 alq->aq_entfree = aln; 486 else 487 alq->aq_entfree = NULL; 488 } else 489 ALQ_UNLOCK(alq); 490 491 492 return (ale); 493} 494 495void 496alq_post(struct alq *alq, struct ale *ale) 497{ 498 int activate; 499 500 ale->ae_flags |= AE_VALID; 501 502 if (alq->aq_entvalid == NULL) 503 alq->aq_entvalid = ale; 504 505 if ((alq->aq_flags & AQ_ACTIVE) == 0) { 506 alq->aq_flags |= AQ_ACTIVE; 507 activate = 1; 508 } else 509 activate = 0; 510 511 ALQ_UNLOCK(alq); 512 if (activate) { 513 ALD_LOCK(); 514 ald_activate(alq); 515 ALD_UNLOCK(); 516 } 517} 518 519void 520alq_flush(struct alq *alq) 521{ 522 int needwakeup = 0; 523 524 ALD_LOCK(); 525 ALQ_LOCK(alq); 526 if (alq->aq_flags & AQ_ACTIVE) { 527 ald_deactivate(alq); 528 ALD_UNLOCK(); 529 needwakeup = alq_doio(alq); 530 } else 531 ALD_UNLOCK(); 532 ALQ_UNLOCK(alq); 533 534 if (needwakeup) 535 wakeup(alq); 536} 537 538/* 539 * Flush remaining data, close the file and free all resources. 540 */ 541void 542alq_close(struct alq *alq) 543{ 544 /* Only flush and destroy alq if not already shutting down. */ 545 if (ald_rem(alq) == 0) 546 alq_destroy(alq); 547} 548 549static int 550alq_load_handler(module_t mod, int what, void *arg) 551{ 552 int ret; 553 554 ret = 0; 555 556 switch (what) { 557 case MOD_LOAD: 558 case MOD_SHUTDOWN: 559 break; 560 561 case MOD_QUIESCE: 562 ALD_LOCK(); 563 /* Only allow unload if there are no open queues. */ 564 if (LIST_FIRST(&ald_queues) == NULL) { 565 ald_shutingdown = 1; 566 ALD_UNLOCK(); 567 ald_shutdown(NULL, 0); 568 mtx_destroy(&ald_mtx); 569 } else { 570 ALD_UNLOCK(); 571 ret = EBUSY; 572 } 573 break; 574 575 case MOD_UNLOAD: 576 /* If MOD_QUIESCE failed we must fail here too. */ 577 if (ald_shutingdown == 0) 578 ret = EBUSY; 579 break; 580 581 default: 582 ret = EINVAL; 583 break; 584 } 585 586 return (ret); 587} 588 589static moduledata_t alq_mod = 590{ 591 "alq", 592 alq_load_handler, 593 NULL 594}; 595 596DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY); 597MODULE_VERSION(alq, 1); 598