g_sched.c revision 302408
1/*- 2 * Copyright (c) 2009-2010 Fabio Checconi 3 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * $Id$ 30 * $FreeBSD: stable/11/sys/geom/sched/g_sched.c 297793 2016-04-10 23:07:00Z pfg $ 31 * 32 * Main control module for geom-based disk schedulers ('sched'). 33 * 34 * USER VIEW 35 * A 'sched' node is typically inserted transparently between 36 * an existing provider pp and its original geom gp 37 * 38 * [pp --> gp ..] 39 * 40 * using the command "geom sched insert <provider>" and 41 * resulting in the following topology 42 * 43 * [pp --> sched_gp --> cp] [new_pp --> gp ... ] 44 * 45 * Deletion "geom sched destroy <provider>.sched." restores the 46 * original chain. The normal "geom sched create <provide>" 47 * is also supported. 48 * 49 * INTERNALS 50 * Internally, the 'sched' uses the following data structures 51 * 52 * geom{} g_sched_softc{} g_gsched{} 53 * +----------+ +---------------+ +-------------+ 54 * | softc *-|--->| sc_gsched *-|-->| gs_init | 55 * | ... | | | | gs_fini | 56 * | | | [ hash table] | | gs_start | 57 * +----------+ | | | ... | 58 * | | +-------------+ 59 * | | 60 * | | g_*_softc{} 61 * | | +-------------+ 62 * | sc_data *-|-->| | 63 * +---------------+ | algorithm- | 64 * | specific | 65 * +-------------+ 66 * 67 * A g_sched_softc{} is created with a "geom sched insert" call. 68 * In turn this instantiates a specific scheduling algorithm, 69 * which sets sc_gsched to point to the algorithm callbacks, 70 * and calls gs_init() to create the g_*_softc{} . 71 * The other callbacks (gs_start, gs_next, ...) are invoked 72 * as needed 73 * 74 * g_sched_softc{} is defined in g_sched.h and mostly used here; 75 * g_gsched{}, and the gs_callbacks, are documented in gs_scheduler.h; 76 * g_*_softc{} is defined/implemented by each algorithm (gs_*.c) 77 * 78 * DATA MOVING 79 * When a bio is received on the provider, it goes to the 80 * g_sched_start() which calls gs_start() to initially queue it; 81 * then we call g_sched_dispatch() that loops around gs_next() 82 * to select zero or more bio's to be sent downstream. 83 * 84 * g_sched_dispatch() can also be called as a result of a timeout, 85 * e.g. when doing anticipation or pacing requests. 86 * 87 * When a bio comes back, it goes to g_sched_done() which in turn 88 * calls gs_done(). The latter does any necessary housekeeping in 89 * the scheduling algorithm, and may decide to call g_sched_dispatch() 90 * to send more bio's downstream. 91 * 92 * If an algorithm needs per-flow queues, these are created 93 * calling gs_init_class() and destroyed with gs_fini_class(), 94 * and they are also inserted in the hash table implemented in 95 * the g_sched_softc{} 96 * 97 * If an algorithm is replaced, or a transparently-inserted node is 98 * removed with "geom sched destroy", we need to remove all references 99 * to the g_*_softc{} and g_sched_softc from the bio's still in 100 * the scheduler. g_sched_forced_dispatch() helps doing this. 101 * XXX need to explain better. 102 */ 103 104#include <sys/cdefs.h> 105#include <sys/param.h> 106#include <sys/systm.h> 107#include <sys/kernel.h> 108#include <sys/module.h> 109#include <sys/lock.h> 110#include <sys/mutex.h> 111#include <sys/bio.h> 112#include <sys/limits.h> 113#include <sys/hash.h> 114#include <sys/sbuf.h> 115#include <sys/sysctl.h> 116#include <sys/malloc.h> 117#include <sys/proc.h> /* we access curthread */ 118#include <geom/geom.h> 119#include "gs_scheduler.h" 120#include "g_sched.h" /* geom hooks */ 121 122/* 123 * Size of the per-geom hash table storing traffic classes. 124 * We may decide to change it at a later time, it has no ABI 125 * implications as it is only used for run-time allocations. 126 */ 127#define G_SCHED_HASH_SIZE 32 128 129static int g_sched_destroy(struct g_geom *gp, boolean_t force); 130static int g_sched_destroy_geom(struct gctl_req *req, 131 struct g_class *mp, struct g_geom *gp); 132static void g_sched_config(struct gctl_req *req, struct g_class *mp, 133 const char *verb); 134static struct g_geom *g_sched_taste(struct g_class *mp, 135 struct g_provider *pp, int flags __unused); 136static void g_sched_dumpconf(struct sbuf *sb, const char *indent, 137 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 138static void g_sched_init(struct g_class *mp); 139static void g_sched_fini(struct g_class *mp); 140static int g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, 141 int fflag, struct thread *td); 142 143struct g_class g_sched_class = { 144 .name = G_SCHED_CLASS_NAME, 145 .version = G_VERSION, 146 .ctlreq = g_sched_config, 147 .taste = g_sched_taste, 148 .destroy_geom = g_sched_destroy_geom, 149 .init = g_sched_init, 150 .ioctl = g_sched_ioctl, 151 .fini = g_sched_fini 152}; 153 154MALLOC_DEFINE(M_GEOM_SCHED, "GEOM_SCHED", "Geom schedulers data structures"); 155 156/* 157 * Global variables describing the state of the geom_sched module. 158 * There is only one static instance of this structure. 159 */ 160LIST_HEAD(gs_list, g_gsched); /* type, link field */ 161struct geom_sched_vars { 162 struct mtx gs_mtx; 163 struct gs_list gs_scheds; /* list of algorithms */ 164 u_int gs_debug; 165 u_int gs_sched_count; /* how many algorithms ? */ 166 u_int gs_patched; /* g_io_request was patched */ 167 168 u_int gs_initialized; 169 u_int gs_expire_secs; /* expiration of hash entries */ 170 171 struct bio_queue_head gs_pending; 172 u_int gs_npending; 173 174 /* The following are for stats, usually protected by gs_mtx. */ 175 u_long gs_requests; /* total requests */ 176 u_long gs_done; /* total done */ 177 u_int gs_in_flight; /* requests in flight */ 178 u_int gs_writes_in_flight; 179 u_int gs_bytes_in_flight; 180 u_int gs_write_bytes_in_flight; 181 182 char gs_names[256]; /* names of schedulers */ 183}; 184 185static struct geom_sched_vars me = { 186 .gs_expire_secs = 10, 187}; 188 189SYSCTL_DECL(_kern_geom); 190SYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0, 191 "GEOM_SCHED stuff"); 192 193SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_wb, CTLFLAG_RD, 194 &me.gs_write_bytes_in_flight, 0, "Write bytes in flight"); 195 196SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_b, CTLFLAG_RD, 197 &me.gs_bytes_in_flight, 0, "Bytes in flight"); 198 199SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_w, CTLFLAG_RD, 200 &me.gs_writes_in_flight, 0, "Write Requests in flight"); 201 202SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight, CTLFLAG_RD, 203 &me.gs_in_flight, 0, "Requests in flight"); 204 205SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, done, CTLFLAG_RD, 206 &me.gs_done, 0, "Total done"); 207 208SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, requests, CTLFLAG_RD, 209 &me.gs_requests, 0, "Total requests"); 210 211SYSCTL_STRING(_kern_geom_sched, OID_AUTO, algorithms, CTLFLAG_RD, 212 &me.gs_names, 0, "Algorithm names"); 213 214SYSCTL_UINT(_kern_geom_sched, OID_AUTO, alg_count, CTLFLAG_RD, 215 &me.gs_sched_count, 0, "Number of algorithms"); 216 217SYSCTL_UINT(_kern_geom_sched, OID_AUTO, debug, CTLFLAG_RW, 218 &me.gs_debug, 0, "Debug level"); 219 220SYSCTL_UINT(_kern_geom_sched, OID_AUTO, expire_secs, CTLFLAG_RW, 221 &me.gs_expire_secs, 0, "Expire time in seconds"); 222 223/* 224 * g_sched calls the scheduler algorithms with this lock held. 225 * The locking functions are exposed so the scheduler algorithms can also 226 * protect themselves e.g. when running a callout handler. 227 */ 228void 229g_sched_lock(struct g_geom *gp) 230{ 231 struct g_sched_softc *sc = gp->softc; 232 233 mtx_lock(&sc->sc_mtx); 234} 235 236void 237g_sched_unlock(struct g_geom *gp) 238{ 239 struct g_sched_softc *sc = gp->softc; 240 241 mtx_unlock(&sc->sc_mtx); 242} 243 244/* 245 * Support functions to handle references to the module, 246 * which are coming from devices using this scheduler. 247 */ 248static inline void 249g_gsched_ref(struct g_gsched *gsp) 250{ 251 252 atomic_add_int(&gsp->gs_refs, 1); 253} 254 255static inline void 256g_gsched_unref(struct g_gsched *gsp) 257{ 258 259 atomic_add_int(&gsp->gs_refs, -1); 260} 261 262/* 263 * Update the stats when this request is done. 264 */ 265static void 266g_sched_update_stats(struct bio *bio) 267{ 268 269 me.gs_done++; 270 me.gs_in_flight--; 271 me.gs_bytes_in_flight -= bio->bio_length; 272 if (bio->bio_cmd == BIO_WRITE) { 273 me.gs_writes_in_flight--; 274 me.gs_write_bytes_in_flight -= bio->bio_length; 275 } 276} 277 278/* 279 * Dispatch any pending request. 280 */ 281static void 282g_sched_forced_dispatch(struct g_geom *gp) 283{ 284 struct g_sched_softc *sc = gp->softc; 285 struct g_gsched *gsp = sc->sc_gsched; 286 struct bio *bp; 287 288 KASSERT(mtx_owned(&sc->sc_mtx), 289 ("sc_mtx not owned during forced dispatch")); 290 291 while ((bp = gsp->gs_next(sc->sc_data, 1)) != NULL) 292 g_io_request(bp, LIST_FIRST(&gp->consumer)); 293} 294 295/* 296 * The main dispatch loop, called either here after the start 297 * routine, or by scheduling algorithms when they receive a timeout 298 * or a 'done' notification. Does not share code with the forced 299 * dispatch path, since the gs_done() callback can call us. 300 */ 301void 302g_sched_dispatch(struct g_geom *gp) 303{ 304 struct g_sched_softc *sc = gp->softc; 305 struct g_gsched *gsp = sc->sc_gsched; 306 struct bio *bp; 307 308 KASSERT(mtx_owned(&sc->sc_mtx), ("sc_mtx not owned during dispatch")); 309 310 if ((sc->sc_flags & G_SCHED_FLUSHING)) 311 return; 312 313 while ((bp = gsp->gs_next(sc->sc_data, 0)) != NULL) 314 g_io_request(bp, LIST_FIRST(&gp->consumer)); 315} 316 317/* 318 * Recent (8.0 and above) versions of FreeBSD have support to 319 * register classifiers of disk requests. The classifier is 320 * invoked by g_io_request(), and stores the information into 321 * bp->bio_classifier1. 322 * 323 * Support for older versions, which is left here only for 324 * documentation purposes, relies on two hacks: 325 * 1. classification info is written into the bio_caller1 326 * field of the topmost node in the bio chain. This field 327 * is rarely used, but this module is incompatible with 328 * those that use bio_caller1 for other purposes, 329 * such as ZFS and gjournal; 330 * 2. g_io_request() is patched in-memory when the module is 331 * loaded, so that the function calls a classifier as its 332 * first thing. g_io_request() is restored when the module 333 * is unloaded. This functionality is only supported for 334 * x86 and amd64, other architectures need source code changes. 335 */ 336 337/* 338 * Lookup the identity of the issuer of the original request. 339 * In the current implementation we use the curthread of the 340 * issuer, but different mechanisms may be implemented later 341 * so we do not make assumptions on the return value which for 342 * us is just an opaque identifier. 343 */ 344 345static inline u_long 346g_sched_classify(struct bio *bp) 347{ 348 349 /* we have classifier fields in the struct bio */ 350 return ((u_long)bp->bio_classifier1); 351} 352 353/* Return the hash chain for the given key. */ 354static inline struct g_hash * 355g_sched_hash(struct g_sched_softc *sc, u_long key) 356{ 357 358 return (&sc->sc_hash[key & sc->sc_mask]); 359} 360 361/* 362 * Helper function for the children classes, which takes 363 * a geom and a bio and returns the private descriptor 364 * associated to the request. This involves fetching 365 * the classification field and [al]locating the 366 * corresponding entry in the hash table. 367 */ 368void * 369g_sched_get_class(struct g_geom *gp, struct bio *bp) 370{ 371 struct g_sched_softc *sc; 372 struct g_sched_class *gsc; 373 struct g_gsched *gsp; 374 struct g_hash *bucket; 375 u_long key; 376 377 sc = gp->softc; 378 key = g_sched_classify(bp); 379 bucket = g_sched_hash(sc, key); 380 LIST_FOREACH(gsc, bucket, gsc_clist) { 381 if (key == gsc->gsc_key) { 382 gsc->gsc_refs++; 383 return (gsc->gsc_priv); 384 } 385 } 386 387 gsp = sc->sc_gsched; 388 gsc = malloc(sizeof(*gsc) + gsp->gs_priv_size, 389 M_GEOM_SCHED, M_NOWAIT | M_ZERO); 390 if (!gsc) 391 return (NULL); 392 393 if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) { 394 free(gsc, M_GEOM_SCHED); 395 return (NULL); 396 } 397 398 gsc->gsc_refs = 2; /* 1 for the hash table, 1 for the caller. */ 399 gsc->gsc_key = key; 400 LIST_INSERT_HEAD(bucket, gsc, gsc_clist); 401 402 gsc->gsc_expire = ticks + me.gs_expire_secs * hz; 403 404 return (gsc->gsc_priv); 405} 406 407/* 408 * Release a reference to the per-client descriptor, 409 */ 410void 411g_sched_put_class(struct g_geom *gp, void *priv) 412{ 413 struct g_sched_class *gsc; 414 struct g_sched_softc *sc; 415 416 gsc = g_sched_priv2class(priv); 417 gsc->gsc_expire = ticks + me.gs_expire_secs * hz; 418 419 if (--gsc->gsc_refs > 0) 420 return; 421 422 sc = gp->softc; 423 sc->sc_gsched->gs_fini_class(sc->sc_data, priv); 424 425 LIST_REMOVE(gsc, gsc_clist); 426 free(gsc, M_GEOM_SCHED); 427} 428 429static void 430g_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask, 431 struct g_gsched *gsp, void *data) 432{ 433 struct g_sched_class *cp, *cp2; 434 int i; 435 436 if (!hp) 437 return; 438 439 if (data && gsp->gs_hash_unref) 440 gsp->gs_hash_unref(data); 441 442 for (i = 0; i < G_SCHED_HASH_SIZE; i++) { 443 LIST_FOREACH_SAFE(cp, &hp[i], gsc_clist, cp2) 444 g_sched_put_class(gp, cp->gsc_priv); 445 } 446 447 hashdestroy(hp, M_GEOM_SCHED, mask); 448} 449 450static struct g_hash * 451g_sched_hash_init(struct g_gsched *gsp, u_long *mask, int flags) 452{ 453 struct g_hash *hash; 454 455 if (gsp->gs_priv_size == 0) 456 return (NULL); 457 458 hash = hashinit_flags(G_SCHED_HASH_SIZE, M_GEOM_SCHED, mask, flags); 459 460 return (hash); 461} 462 463static void 464g_sched_flush_classes(struct g_geom *gp) 465{ 466 struct g_sched_softc *sc; 467 struct g_sched_class *cp, *cp2; 468 int i; 469 470 sc = gp->softc; 471 472 if (!sc->sc_hash || ticks - sc->sc_flush_ticks <= 0) 473 return; 474 475 for (i = 0; i < G_SCHED_HASH_SIZE; i++) { 476 LIST_FOREACH_SAFE(cp, &sc->sc_hash[i], gsc_clist, cp2) { 477 if (cp->gsc_refs == 1 && ticks - cp->gsc_expire > 0) 478 g_sched_put_class(gp, cp->gsc_priv); 479 } 480 } 481 482 sc->sc_flush_ticks = ticks + me.gs_expire_secs * hz; 483} 484 485/* 486 * Wait for the completion of any outstanding request. To ensure 487 * that this does not take forever the caller has to make sure that 488 * no new request enter the scehduler before calling us. 489 * 490 * Must be called with the gp mutex held and topology locked. 491 */ 492static int 493g_sched_wait_pending(struct g_geom *gp) 494{ 495 struct g_sched_softc *sc = gp->softc; 496 int endticks = ticks + hz; 497 498 g_topology_assert(); 499 500 while (sc->sc_pending && endticks - ticks >= 0) 501 msleep(gp, &sc->sc_mtx, 0, "sched_wait_pending", hz / 4); 502 503 return (sc->sc_pending ? ETIMEDOUT : 0); 504} 505 506static int 507g_sched_remove_locked(struct g_geom *gp, struct g_gsched *gsp) 508{ 509 struct g_sched_softc *sc = gp->softc; 510 int error; 511 512 /* Set the flushing flag: new bios will not enter the scheduler. */ 513 sc->sc_flags |= G_SCHED_FLUSHING; 514 515 g_sched_forced_dispatch(gp); 516 error = g_sched_wait_pending(gp); 517 if (error) 518 goto failed; 519 520 /* No more requests pending or in flight from the old gsp. */ 521 522 g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, gsp, sc->sc_data); 523 sc->sc_hash = NULL; 524 525 /* 526 * Avoid deadlock here by releasing the gp mutex and reacquiring 527 * it once done. It should be safe, since no reconfiguration or 528 * destruction can take place due to the geom topology lock; no 529 * new request can use the current sc_data since we flagged the 530 * geom as being flushed. 531 */ 532 g_sched_unlock(gp); 533 gsp->gs_fini(sc->sc_data); 534 g_sched_lock(gp); 535 536 sc->sc_gsched = NULL; 537 sc->sc_data = NULL; 538 g_gsched_unref(gsp); 539 540failed: 541 sc->sc_flags &= ~G_SCHED_FLUSHING; 542 543 return (error); 544} 545 546static int 547g_sched_remove(struct g_geom *gp, struct g_gsched *gsp) 548{ 549 int error; 550 551 g_sched_lock(gp); 552 error = g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */ 553 g_sched_unlock(gp); 554 555 return (error); 556} 557 558/* 559 * Support function for create/taste -- locate the desired 560 * algorithm and grab a reference to it. 561 */ 562static struct g_gsched * 563g_gsched_find(const char *name) 564{ 565 struct g_gsched *gsp = NULL; 566 567 mtx_lock(&me.gs_mtx); 568 LIST_FOREACH(gsp, &me.gs_scheds, glist) { 569 if (strcmp(name, gsp->gs_name) == 0) { 570 g_gsched_ref(gsp); 571 break; 572 } 573 } 574 mtx_unlock(&me.gs_mtx); 575 576 return (gsp); 577} 578 579/* 580 * Rebuild the list of scheduler names. 581 * To be called with me.gs_mtx lock held. 582 */ 583static void 584g_gsched_build_names(struct g_gsched *gsp) 585{ 586 int pos, l; 587 struct g_gsched *cur; 588 589 pos = 0; 590 LIST_FOREACH(cur, &me.gs_scheds, glist) { 591 l = strlen(cur->gs_name); 592 if (l + pos + 1 + 1 < sizeof(me.gs_names)) { 593 if (pos != 0) 594 me.gs_names[pos++] = ' '; 595 strcpy(me.gs_names + pos, cur->gs_name); 596 pos += l; 597 } 598 } 599 me.gs_names[pos] = '\0'; 600} 601 602/* 603 * Register or unregister individual scheduling algorithms. 604 */ 605static int 606g_gsched_register(struct g_gsched *gsp) 607{ 608 struct g_gsched *cur; 609 int error = 0; 610 611 mtx_lock(&me.gs_mtx); 612 LIST_FOREACH(cur, &me.gs_scheds, glist) { 613 if (strcmp(gsp->gs_name, cur->gs_name) == 0) 614 break; 615 } 616 if (cur != NULL) { 617 G_SCHED_DEBUG(0, "A scheduler named %s already" 618 "exists.", gsp->gs_name); 619 error = EEXIST; 620 } else { 621 LIST_INSERT_HEAD(&me.gs_scheds, gsp, glist); 622 gsp->gs_refs = 1; 623 me.gs_sched_count++; 624 g_gsched_build_names(gsp); 625 } 626 mtx_unlock(&me.gs_mtx); 627 628 return (error); 629} 630 631struct g_gsched_unregparm { 632 struct g_gsched *gup_gsp; 633 int gup_error; 634}; 635 636static void 637g_gsched_unregister(void *arg, int flag) 638{ 639 struct g_gsched_unregparm *parm = arg; 640 struct g_gsched *gsp = parm->gup_gsp, *cur, *tmp; 641 struct g_sched_softc *sc; 642 struct g_geom *gp, *gp_tmp; 643 int error; 644 645 parm->gup_error = 0; 646 647 g_topology_assert(); 648 649 if (flag == EV_CANCEL) 650 return; 651 652 mtx_lock(&me.gs_mtx); 653 654 LIST_FOREACH_SAFE(gp, &g_sched_class.geom, geom, gp_tmp) { 655 if (gp->class != &g_sched_class) 656 continue; /* Should not happen. */ 657 658 sc = gp->softc; 659 if (sc->sc_gsched == gsp) { 660 error = g_sched_remove(gp, gsp); 661 if (error) 662 goto failed; 663 } 664 } 665 666 LIST_FOREACH_SAFE(cur, &me.gs_scheds, glist, tmp) { 667 if (cur != gsp) 668 continue; 669 670 if (gsp->gs_refs != 1) { 671 G_SCHED_DEBUG(0, "%s still in use.", 672 gsp->gs_name); 673 parm->gup_error = EBUSY; 674 } else { 675 LIST_REMOVE(gsp, glist); 676 me.gs_sched_count--; 677 g_gsched_build_names(gsp); 678 } 679 break; 680 } 681 682 if (cur == NULL) { 683 G_SCHED_DEBUG(0, "%s not registered.", gsp->gs_name); 684 parm->gup_error = ENOENT; 685 } 686 687failed: 688 mtx_unlock(&me.gs_mtx); 689} 690 691static inline void 692g_gsched_global_init(void) 693{ 694 695 if (!me.gs_initialized) { 696 G_SCHED_DEBUG(0, "Initializing global data."); 697 mtx_init(&me.gs_mtx, "gsched", NULL, MTX_DEF); 698 LIST_INIT(&me.gs_scheds); 699 bioq_init(&me.gs_pending); 700 me.gs_initialized = 1; 701 } 702} 703 704/* 705 * Module event called when a scheduling algorithm module is loaded or 706 * unloaded. 707 */ 708int 709g_gsched_modevent(module_t mod, int cmd, void *arg) 710{ 711 struct g_gsched *gsp = arg; 712 struct g_gsched_unregparm parm; 713 int error; 714 715 G_SCHED_DEBUG(0, "Modevent %d.", cmd); 716 717 /* 718 * If the module is loaded at boot, the geom thread that calls 719 * g_sched_init() might actually run after g_gsched_modevent(), 720 * so make sure that the module is properly initialized. 721 */ 722 g_gsched_global_init(); 723 724 error = EOPNOTSUPP; 725 switch (cmd) { 726 case MOD_LOAD: 727 error = g_gsched_register(gsp); 728 G_SCHED_DEBUG(0, "Loaded module %s error %d.", 729 gsp->gs_name, error); 730 if (error == 0) 731 g_retaste(&g_sched_class); 732 break; 733 734 case MOD_UNLOAD: 735 parm.gup_gsp = gsp; 736 parm.gup_error = 0; 737 738 error = g_waitfor_event(g_gsched_unregister, 739 &parm, M_WAITOK, NULL); 740 if (error == 0) 741 error = parm.gup_error; 742 G_SCHED_DEBUG(0, "Unloaded module %s error %d.", 743 gsp->gs_name, error); 744 break; 745 } 746 747 return (error); 748} 749 750#ifdef KTR 751#define TRC_BIO_EVENT(e, bp) g_sched_trace_bio_ ## e (bp) 752 753static inline char 754g_sched_type(struct bio *bp) 755{ 756 757 if (bp->bio_cmd == BIO_READ) 758 return ('R'); 759 else if (bp->bio_cmd == BIO_WRITE) 760 return ('W'); 761 return ('U'); 762} 763 764static inline void 765g_sched_trace_bio_START(struct bio *bp) 766{ 767 768 CTR5(KTR_GSCHED, "S %lu %c %lu/%lu %lu", g_sched_classify(bp), 769 g_sched_type(bp), bp->bio_offset / ULONG_MAX, 770 bp->bio_offset, bp->bio_length); 771} 772 773static inline void 774g_sched_trace_bio_DONE(struct bio *bp) 775{ 776 777 CTR5(KTR_GSCHED, "D %lu %c %lu/%lu %lu", g_sched_classify(bp), 778 g_sched_type(bp), bp->bio_offset / ULONG_MAX, 779 bp->bio_offset, bp->bio_length); 780} 781#else /* !KTR */ 782#define TRC_BIO_EVENT(e, bp) 783#endif /* !KTR */ 784 785/* 786 * g_sched_done() and g_sched_start() dispatch the geom requests to 787 * the scheduling algorithm in use. 788 */ 789static void 790g_sched_done(struct bio *bio) 791{ 792 struct g_geom *gp = bio->bio_caller2; 793 struct g_sched_softc *sc = gp->softc; 794 795 TRC_BIO_EVENT(DONE, bio); 796 797 KASSERT(bio->bio_caller1, ("null bio_caller1 in g_sched_done")); 798 799 g_sched_lock(gp); 800 801 g_sched_update_stats(bio); 802 sc->sc_gsched->gs_done(sc->sc_data, bio); 803 if (!--sc->sc_pending) 804 wakeup(gp); 805 806 g_sched_flush_classes(gp); 807 g_sched_unlock(gp); 808 809 g_std_done(bio); 810} 811 812static void 813g_sched_start(struct bio *bp) 814{ 815 struct g_geom *gp = bp->bio_to->geom; 816 struct g_sched_softc *sc = gp->softc; 817 struct bio *cbp; 818 819 TRC_BIO_EVENT(START, bp); 820 G_SCHED_LOGREQ(bp, "Request received."); 821 822 cbp = g_clone_bio(bp); 823 if (cbp == NULL) { 824 g_io_deliver(bp, ENOMEM); 825 return; 826 } 827 cbp->bio_done = g_sched_done; 828 cbp->bio_to = LIST_FIRST(&gp->provider); 829 KASSERT(cbp->bio_to != NULL, ("NULL provider")); 830 831 /* We only schedule reads and writes. */ 832 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE) 833 goto bypass; 834 835 G_SCHED_LOGREQ(cbp, "Sending request."); 836 837 g_sched_lock(gp); 838 /* 839 * Call the algorithm's gs_start to queue the request in the 840 * scheduler. If gs_start fails then pass the request down, 841 * otherwise call g_sched_dispatch() which tries to push 842 * one or more requests down. 843 */ 844 if (!sc->sc_gsched || (sc->sc_flags & G_SCHED_FLUSHING) || 845 sc->sc_gsched->gs_start(sc->sc_data, cbp)) { 846 g_sched_unlock(gp); 847 goto bypass; 848 } 849 /* 850 * We use bio_caller1 to mark requests that are scheduled 851 * so make sure it is not NULL. 852 */ 853 if (cbp->bio_caller1 == NULL) 854 cbp->bio_caller1 = &me; /* anything not NULL */ 855 856 cbp->bio_caller2 = gp; 857 sc->sc_pending++; 858 859 /* Update general stats. */ 860 me.gs_in_flight++; 861 me.gs_requests++; 862 me.gs_bytes_in_flight += bp->bio_length; 863 if (bp->bio_cmd == BIO_WRITE) { 864 me.gs_writes_in_flight++; 865 me.gs_write_bytes_in_flight += bp->bio_length; 866 } 867 g_sched_dispatch(gp); 868 g_sched_unlock(gp); 869 return; 870 871bypass: 872 cbp->bio_done = g_std_done; 873 cbp->bio_caller1 = NULL; /* not scheduled */ 874 g_io_request(cbp, LIST_FIRST(&gp->consumer)); 875} 876 877/* 878 * The next few functions are the geom glue. 879 */ 880static void 881g_sched_orphan(struct g_consumer *cp) 882{ 883 884 g_topology_assert(); 885 g_sched_destroy(cp->geom, 1); 886} 887 888static int 889g_sched_access(struct g_provider *pp, int dr, int dw, int de) 890{ 891 struct g_geom *gp; 892 struct g_consumer *cp; 893 int error; 894 895 gp = pp->geom; 896 cp = LIST_FIRST(&gp->consumer); 897 error = g_access(cp, dr, dw, de); 898 899 return (error); 900} 901 902static void 903g_sched_temporary_start(struct bio *bio) 904{ 905 906 mtx_lock(&me.gs_mtx); 907 me.gs_npending++; 908 bioq_disksort(&me.gs_pending, bio); 909 mtx_unlock(&me.gs_mtx); 910} 911 912static void 913g_sched_flush_pending(g_start_t *start) 914{ 915 struct bio *bp; 916 917 while ((bp = bioq_takefirst(&me.gs_pending))) 918 start(bp); 919} 920 921static int 922g_insert_proxy(struct g_geom *gp, struct g_provider *newpp, 923 struct g_geom *dstgp, struct g_provider *pp, struct g_consumer *cp) 924{ 925 struct g_sched_softc *sc = gp->softc; 926 g_start_t *saved_start, *flush = g_sched_start; 927 int error = 0, endticks = ticks + hz; 928 929 g_cancel_event(newpp); /* prevent taste() */ 930 /* copy private fields */ 931 newpp->private = pp->private; 932 newpp->index = pp->index; 933 934 /* Queue all the early requests coming for us. */ 935 me.gs_npending = 0; 936 saved_start = pp->geom->start; 937 dstgp->start = g_sched_temporary_start; 938 939 while (pp->nstart - pp->nend != me.gs_npending && 940 endticks - ticks >= 0) 941 tsleep(pp, PRIBIO, "-", hz/10); 942 943 if (pp->nstart - pp->nend != me.gs_npending) { 944 flush = saved_start; 945 error = ETIMEDOUT; 946 goto fail; 947 } 948 949 /* link pp to this geom */ 950 LIST_REMOVE(pp, provider); 951 pp->geom = gp; 952 LIST_INSERT_HEAD(&gp->provider, pp, provider); 953 954 /* 955 * replicate the counts from the parent in the 956 * new provider and consumer nodes 957 */ 958 cp->acr = newpp->acr = pp->acr; 959 cp->acw = newpp->acw = pp->acw; 960 cp->ace = newpp->ace = pp->ace; 961 sc->sc_flags |= G_SCHED_PROXYING; 962 963fail: 964 dstgp->start = saved_start; 965 966 g_sched_flush_pending(flush); 967 968 return (error); 969} 970 971/* 972 * Create a geom node for the device passed as *pp. 973 * If successful, add a reference to this gsp. 974 */ 975static int 976g_sched_create(struct gctl_req *req, struct g_class *mp, 977 struct g_provider *pp, struct g_gsched *gsp, int proxy) 978{ 979 struct g_sched_softc *sc = NULL; 980 struct g_geom *gp, *dstgp; 981 struct g_provider *newpp = NULL; 982 struct g_consumer *cp = NULL; 983 char name[64]; 984 int error; 985 986 g_topology_assert(); 987 988 snprintf(name, sizeof(name), "%s%s", pp->name, G_SCHED_SUFFIX); 989 LIST_FOREACH(gp, &mp->geom, geom) { 990 if (strcmp(gp->name, name) == 0) { 991 gctl_error(req, "Geom %s already exists.", 992 name); 993 return (EEXIST); 994 } 995 } 996 997 gp = g_new_geomf(mp, "%s", name); 998 dstgp = proxy ? pp->geom : gp; /* where do we link the provider */ 999 1000 sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); 1001 sc->sc_gsched = gsp; 1002 sc->sc_data = gsp->gs_init(gp); 1003 if (sc->sc_data == NULL) { 1004 error = ENOMEM; 1005 goto fail; 1006 } 1007 1008 sc->sc_hash = g_sched_hash_init(gsp, &sc->sc_mask, HASH_WAITOK); 1009 1010 /* 1011 * Do not initialize the flush mechanism, will be initialized 1012 * on the first insertion on the hash table. 1013 */ 1014 1015 mtx_init(&sc->sc_mtx, "g_sched_mtx", NULL, MTX_DEF); 1016 1017 gp->softc = sc; 1018 gp->start = g_sched_start; 1019 gp->orphan = g_sched_orphan; 1020 gp->access = g_sched_access; 1021 gp->dumpconf = g_sched_dumpconf; 1022 1023 newpp = g_new_providerf(dstgp, "%s", gp->name); 1024 newpp->mediasize = pp->mediasize; 1025 newpp->sectorsize = pp->sectorsize; 1026 1027 cp = g_new_consumer(gp); 1028 error = g_attach(cp, proxy ? newpp : pp); 1029 if (error != 0) { 1030 gctl_error(req, "Cannot attach to provider %s.", 1031 pp->name); 1032 goto fail; 1033 } 1034 1035 g_error_provider(newpp, 0); 1036 if (proxy) { 1037 error = g_insert_proxy(gp, newpp, dstgp, pp, cp); 1038 if (error) 1039 goto fail; 1040 } 1041 G_SCHED_DEBUG(0, "Device %s created.", gp->name); 1042 1043 g_gsched_ref(gsp); 1044 1045 return (0); 1046 1047fail: 1048 if (cp != NULL) { 1049 if (cp->provider != NULL) 1050 g_detach(cp); 1051 g_destroy_consumer(cp); 1052 } 1053 if (newpp != NULL) 1054 g_destroy_provider(newpp); 1055 if (sc->sc_hash) 1056 g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, 1057 gsp, sc->sc_data); 1058 if (sc->sc_data) 1059 gsp->gs_fini(sc->sc_data); 1060 g_free(gp->softc); 1061 g_destroy_geom(gp); 1062 1063 return (error); 1064} 1065 1066/* 1067 * Support for dynamic switching of scheduling algorithms. 1068 * First initialize the data structures for the new algorithm, 1069 * then call g_sched_remove_locked() to flush all references 1070 * to the old one, finally link the new algorithm. 1071 */ 1072static int 1073g_sched_change_algo(struct gctl_req *req, struct g_class *mp, 1074 struct g_provider *pp, struct g_gsched *gsp) 1075{ 1076 struct g_sched_softc *sc; 1077 struct g_geom *gp; 1078 struct g_hash *newh; 1079 void *data; 1080 u_long mask; 1081 int error = 0; 1082 1083 gp = pp->geom; 1084 sc = gp->softc; 1085 1086 data = gsp->gs_init(gp); 1087 if (data == NULL) 1088 return (ENOMEM); 1089 1090 newh = g_sched_hash_init(gsp, &mask, HASH_WAITOK); 1091 if (gsp->gs_priv_size && !newh) { 1092 error = ENOMEM; 1093 goto fail; 1094 } 1095 1096 g_sched_lock(gp); 1097 if (sc->sc_gsched) { /* can be NULL in some cases */ 1098 error = g_sched_remove_locked(gp, sc->sc_gsched); 1099 if (error) 1100 goto fail; 1101 } 1102 1103 g_gsched_ref(gsp); 1104 sc->sc_gsched = gsp; 1105 sc->sc_data = data; 1106 sc->sc_hash = newh; 1107 sc->sc_mask = mask; 1108 1109 g_sched_unlock(gp); 1110 1111 return (0); 1112 1113fail: 1114 if (newh) 1115 g_sched_hash_fini(gp, newh, mask, gsp, data); 1116 1117 if (data) 1118 gsp->gs_fini(data); 1119 1120 g_sched_unlock(gp); 1121 1122 return (error); 1123} 1124 1125/* 1126 * Stop the request flow directed to the proxy, redirecting the new 1127 * requests to the me.gs_pending queue. 1128 */ 1129static struct g_provider * 1130g_detach_proxy(struct g_geom *gp) 1131{ 1132 struct g_consumer *cp; 1133 struct g_provider *pp, *newpp; 1134 1135 do { 1136 pp = LIST_FIRST(&gp->provider); 1137 if (pp == NULL) 1138 break; 1139 cp = LIST_FIRST(&gp->consumer); 1140 if (cp == NULL) 1141 break; 1142 newpp = cp->provider; 1143 if (newpp == NULL) 1144 break; 1145 1146 me.gs_npending = 0; 1147 pp->geom->start = g_sched_temporary_start; 1148 1149 return (pp); 1150 } while (0); 1151 printf("%s error detaching proxy %s\n", __FUNCTION__, gp->name); 1152 1153 return (NULL); 1154} 1155 1156static void 1157g_sched_blackhole(struct bio *bp) 1158{ 1159 1160 g_io_deliver(bp, ENXIO); 1161} 1162 1163static inline void 1164g_reparent_provider(struct g_provider *pp, struct g_geom *gp, 1165 struct g_provider *newpp) 1166{ 1167 1168 LIST_REMOVE(pp, provider); 1169 if (newpp) { 1170 pp->private = newpp->private; 1171 pp->index = newpp->index; 1172 } 1173 pp->geom = gp; 1174 LIST_INSERT_HEAD(&gp->provider, pp, provider); 1175} 1176 1177static inline void 1178g_unproxy_provider(struct g_provider *oldpp, struct g_provider *newpp) 1179{ 1180 struct g_geom *gp = oldpp->geom; 1181 1182 g_reparent_provider(oldpp, newpp->geom, newpp); 1183 1184 /* 1185 * Hackish: let the system destroy the old provider for us, just 1186 * in case someone attached a consumer to it, in which case a 1187 * direct call to g_destroy_provider() would not work. 1188 */ 1189 g_reparent_provider(newpp, gp, NULL); 1190} 1191 1192/* 1193 * Complete the proxy destruction, linking the old provider to its 1194 * original geom, and destroying the proxy provider. Also take care 1195 * of issuing the pending requests collected in me.gs_pending (if any). 1196 */ 1197static int 1198g_destroy_proxy(struct g_geom *gp, struct g_provider *oldpp) 1199{ 1200 struct g_consumer *cp; 1201 struct g_provider *newpp; 1202 1203 do { 1204 cp = LIST_FIRST(&gp->consumer); 1205 if (cp == NULL) 1206 break; 1207 newpp = cp->provider; 1208 if (newpp == NULL) 1209 break; 1210 1211 /* Relink the provider to its original geom. */ 1212 g_unproxy_provider(oldpp, newpp); 1213 1214 /* Detach consumer from provider, and destroy provider. */ 1215 cp->acr = newpp->acr = 0; 1216 cp->acw = newpp->acw = 0; 1217 cp->ace = newpp->ace = 0; 1218 g_detach(cp); 1219 1220 /* Send the pending bios through the right start function. */ 1221 g_sched_flush_pending(oldpp->geom->start); 1222 1223 return (0); 1224 } while (0); 1225 printf("%s error destroying proxy %s\n", __FUNCTION__, gp->name); 1226 1227 /* We cannot send the pending bios anywhere... */ 1228 g_sched_flush_pending(g_sched_blackhole); 1229 1230 return (EINVAL); 1231} 1232 1233static int 1234g_sched_destroy(struct g_geom *gp, boolean_t force) 1235{ 1236 struct g_provider *pp, *oldpp = NULL; 1237 struct g_sched_softc *sc; 1238 struct g_gsched *gsp; 1239 int error; 1240 1241 g_topology_assert(); 1242 sc = gp->softc; 1243 if (sc == NULL) 1244 return (ENXIO); 1245 if (!(sc->sc_flags & G_SCHED_PROXYING)) { 1246 pp = LIST_FIRST(&gp->provider); 1247 if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 1248 const char *msg = force ? 1249 "but we force removal" : "cannot remove"; 1250 1251 G_SCHED_DEBUG(!force, 1252 "Device %s is still open (r%dw%de%d), %s.", 1253 pp->name, pp->acr, pp->acw, pp->ace, msg); 1254 if (!force) 1255 return (EBUSY); 1256 } else { 1257 G_SCHED_DEBUG(0, "Device %s removed.", gp->name); 1258 } 1259 } else 1260 oldpp = g_detach_proxy(gp); 1261 1262 gsp = sc->sc_gsched; 1263 if (gsp) { 1264 /* 1265 * XXX bad hack here: force a dispatch to release 1266 * any reference to the hash table still held by 1267 * the scheduler. 1268 */ 1269 g_sched_lock(gp); 1270 /* 1271 * We are dying here, no new requests should enter 1272 * the scheduler. This is granted by the topolgy, 1273 * either in case we were proxying (new bios are 1274 * being redirected) or not (see the access check 1275 * above). 1276 */ 1277 g_sched_forced_dispatch(gp); 1278 error = g_sched_wait_pending(gp); 1279 1280 if (error) { 1281 /* 1282 * Not all the requests came home: this might happen 1283 * under heavy load, or if we were waiting for any 1284 * bio which is served in the event path (see 1285 * geom_slice.c for an example of how this can 1286 * happen). Try to restore a working configuration 1287 * if we can fail. 1288 */ 1289 if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) { 1290 g_sched_flush_pending(force ? 1291 g_sched_blackhole : g_sched_start); 1292 } 1293 1294 /* 1295 * In the forced destroy case there is not so much 1296 * we can do, we have pending bios that will call 1297 * g_sched_done() somehow, and we don't want them 1298 * to crash the system using freed memory. We tell 1299 * the user that something went wrong, and leak some 1300 * memory here. 1301 * Note: the callers using force = 1 ignore the 1302 * return value. 1303 */ 1304 if (force) { 1305 G_SCHED_DEBUG(0, "Pending requests while " 1306 " destroying geom, some memory leaked."); 1307 } 1308 1309 return (error); 1310 } 1311 1312 g_sched_unlock(gp); 1313 g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, 1314 gsp, sc->sc_data); 1315 sc->sc_hash = NULL; 1316 gsp->gs_fini(sc->sc_data); 1317 g_gsched_unref(gsp); 1318 sc->sc_gsched = NULL; 1319 } else 1320 error = 0; 1321 1322 if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) { 1323 error = g_destroy_proxy(gp, oldpp); 1324 1325 if (error) { 1326 if (force) { 1327 G_SCHED_DEBUG(0, "Unrecoverable error while " 1328 "destroying a proxy geom, leaking some " 1329 " memory."); 1330 } 1331 1332 return (error); 1333 } 1334 } 1335 1336 mtx_destroy(&sc->sc_mtx); 1337 1338 g_free(gp->softc); 1339 gp->softc = NULL; 1340 g_wither_geom(gp, ENXIO); 1341 1342 return (error); 1343} 1344 1345static int 1346g_sched_destroy_geom(struct gctl_req *req, struct g_class *mp, 1347 struct g_geom *gp) 1348{ 1349 1350 return (g_sched_destroy(gp, 0)); 1351} 1352 1353/* 1354 * Functions related to the classification of requests. 1355 * 1356 * On recent FreeBSD versions (8.0 and above), we store a reference 1357 * to the issuer of a request in bp->bio_classifier1 as soon 1358 * as the bio is posted to the geom queue (and not later, because 1359 * requests are managed by the g_down thread afterwards). 1360 */ 1361 1362/* 1363 * Classifier support for recent FreeBSD versions: we use 1364 * a very simple classifier, only use curthread to tag a request. 1365 * The classifier is registered at module load, and unregistered 1366 * at module unload. 1367 */ 1368static int 1369g_sched_tag(void *arg, struct bio *bp) 1370{ 1371 1372 bp->bio_classifier1 = curthread; 1373 return (1); 1374} 1375 1376static struct g_classifier_hook g_sched_classifier = { 1377 .func = g_sched_tag, 1378}; 1379 1380static inline void 1381g_classifier_ini(void) 1382{ 1383 1384 g_register_classifier(&g_sched_classifier); 1385} 1386 1387static inline void 1388g_classifier_fini(void) 1389{ 1390 1391 g_unregister_classifier(&g_sched_classifier); 1392} 1393 1394static void 1395g_sched_init(struct g_class *mp) 1396{ 1397 1398 g_gsched_global_init(); 1399 1400 G_SCHED_DEBUG(0, "Loading: mp = %p, g_sched_class = %p.", 1401 mp, &g_sched_class); 1402 1403 /* Patch g_io_request to store classification info in the bio. */ 1404 g_classifier_ini(); 1405} 1406 1407static void 1408g_sched_fini(struct g_class *mp) 1409{ 1410 1411 g_classifier_fini(); 1412 1413 G_SCHED_DEBUG(0, "Unloading..."); 1414 1415 KASSERT(LIST_EMPTY(&me.gs_scheds), ("still registered schedulers")); 1416 mtx_destroy(&me.gs_mtx); 1417} 1418 1419static int 1420g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, 1421 struct thread *td) 1422{ 1423 struct g_consumer *cp; 1424 struct g_geom *gp; 1425 1426 cp = LIST_FIRST(&pp->geom->consumer); 1427 if (cp == NULL) 1428 return (ENOIOCTL); 1429 gp = cp->provider->geom; 1430 if (gp->ioctl == NULL) 1431 return (ENOIOCTL); 1432 return (gp->ioctl(cp->provider, cmd, data, fflag, td)); 1433} 1434 1435/* 1436 * Read the i-th argument for a request, skipping the /dev/ 1437 * prefix if present. 1438 */ 1439static const char * 1440g_sched_argi(struct gctl_req *req, int i) 1441{ 1442 static const char *dev_prefix = "/dev/"; 1443 const char *name; 1444 char param[16]; 1445 int l = strlen(dev_prefix); 1446 1447 snprintf(param, sizeof(param), "arg%d", i); 1448 name = gctl_get_asciiparam(req, param); 1449 if (name == NULL) 1450 gctl_error(req, "No 'arg%d' argument", i); 1451 else if (strncmp(name, dev_prefix, l) == 0) 1452 name += l; 1453 return (name); 1454} 1455 1456/* 1457 * Fetch nargs and do appropriate checks. 1458 */ 1459static int 1460g_sched_get_nargs(struct gctl_req *req) 1461{ 1462 int *nargs; 1463 1464 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1465 if (nargs == NULL) { 1466 gctl_error(req, "No 'nargs' argument"); 1467 return (0); 1468 } 1469 if (*nargs <= 0) 1470 gctl_error(req, "Missing device(s)."); 1471 return (*nargs); 1472} 1473 1474/* 1475 * Check whether we should add the class on certain volumes when 1476 * this geom is created. Right now this is under control of a kenv 1477 * variable containing the names of all devices that we care about. 1478 * Probably we should only support transparent insertion as the 1479 * preferred mode of operation. 1480 */ 1481static struct g_geom * 1482g_sched_taste(struct g_class *mp, struct g_provider *pp, 1483 int flags __unused) 1484{ 1485 struct g_gsched *gsp = NULL; /* the . algorithm we want */ 1486 const char *s; /* generic string pointer */ 1487 const char *taste_names; /* devices we like */ 1488 int l; 1489 1490 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, 1491 mp->name, pp->name); 1492 g_topology_assert(); 1493 1494 G_SCHED_DEBUG(2, "Tasting %s.", pp->name); 1495 1496 do { 1497 /* do not taste on ourselves */ 1498 if (pp->geom->class == mp) 1499 break; 1500 1501 taste_names = kern_getenv("geom.sched.taste"); 1502 if (taste_names == NULL) 1503 break; 1504 1505 l = strlen(pp->name); 1506 for (s = taste_names; *s && 1507 (s = strstr(s, pp->name)); s++) { 1508 /* further checks for an exact match */ 1509 if ( (s == taste_names || s[-1] == ' ') && 1510 (s[l] == '\0' || s[l] == ' ') ) 1511 break; 1512 } 1513 if (s == NULL) 1514 break; 1515 G_SCHED_DEBUG(0, "Attach device %s match [%s]\n", 1516 pp->name, s); 1517 1518 /* look up the provider name in the list */ 1519 s = kern_getenv("geom.sched.algo"); 1520 if (s == NULL) 1521 s = "rr"; 1522 1523 gsp = g_gsched_find(s); /* also get a reference */ 1524 if (gsp == NULL) { 1525 G_SCHED_DEBUG(0, "Bad '%s' algorithm.", s); 1526 break; 1527 } 1528 1529 /* XXX create with 1 as last argument ? */ 1530 g_sched_create(NULL, mp, pp, gsp, 0); 1531 g_gsched_unref(gsp); 1532 } while (0); 1533 return NULL; 1534} 1535 1536static void 1537g_sched_ctl_create(struct gctl_req *req, struct g_class *mp, int proxy) 1538{ 1539 struct g_provider *pp; 1540 struct g_gsched *gsp; 1541 const char *name; 1542 int i, nargs; 1543 1544 g_topology_assert(); 1545 1546 name = gctl_get_asciiparam(req, "algo"); 1547 if (name == NULL) { 1548 gctl_error(req, "No '%s' argument", "algo"); 1549 return; 1550 } 1551 1552 gsp = g_gsched_find(name); /* also get a reference */ 1553 if (gsp == NULL) { 1554 gctl_error(req, "Bad algorithm '%s'", name); 1555 return; 1556 } 1557 1558 nargs = g_sched_get_nargs(req); 1559 1560 /* 1561 * Run on the arguments, and break on any error. 1562 * We look for a device name, but skip the /dev/ prefix if any. 1563 */ 1564 for (i = 0; i < nargs; i++) { 1565 name = g_sched_argi(req, i); 1566 if (name == NULL) 1567 break; 1568 pp = g_provider_by_name(name); 1569 if (pp == NULL) { 1570 G_SCHED_DEBUG(1, "Provider %s is invalid.", name); 1571 gctl_error(req, "Provider %s is invalid.", name); 1572 break; 1573 } 1574 if (g_sched_create(req, mp, pp, gsp, proxy) != 0) 1575 break; 1576 } 1577 1578 g_gsched_unref(gsp); 1579} 1580 1581static void 1582g_sched_ctl_configure(struct gctl_req *req, struct g_class *mp) 1583{ 1584 struct g_provider *pp; 1585 struct g_gsched *gsp; 1586 const char *name; 1587 int i, nargs; 1588 1589 g_topology_assert(); 1590 1591 name = gctl_get_asciiparam(req, "algo"); 1592 if (name == NULL) { 1593 gctl_error(req, "No '%s' argument", "algo"); 1594 return; 1595 } 1596 1597 gsp = g_gsched_find(name); /* also get a reference */ 1598 if (gsp == NULL) { 1599 gctl_error(req, "Bad algorithm '%s'", name); 1600 return; 1601 } 1602 1603 nargs = g_sched_get_nargs(req); 1604 1605 /* 1606 * Run on the arguments, and break on any error. 1607 * We look for a device name, but skip the /dev/ prefix if any. 1608 */ 1609 for (i = 0; i < nargs; i++) { 1610 name = g_sched_argi(req, i); 1611 if (name == NULL) 1612 break; 1613 pp = g_provider_by_name(name); 1614 if (pp == NULL || pp->geom->class != mp) { 1615 G_SCHED_DEBUG(1, "Provider %s is invalid.", name); 1616 gctl_error(req, "Provider %s is invalid.", name); 1617 break; 1618 } 1619 if (g_sched_change_algo(req, mp, pp, gsp) != 0) 1620 break; 1621 } 1622 1623 g_gsched_unref(gsp); 1624} 1625 1626static struct g_geom * 1627g_sched_find_geom(struct g_class *mp, const char *name) 1628{ 1629 struct g_geom *gp; 1630 1631 LIST_FOREACH(gp, &mp->geom, geom) { 1632 if (strcmp(gp->name, name) == 0) 1633 return (gp); 1634 } 1635 return (NULL); 1636} 1637 1638static void 1639g_sched_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1640{ 1641 int nargs, *force, error, i; 1642 struct g_geom *gp; 1643 const char *name; 1644 1645 g_topology_assert(); 1646 1647 nargs = g_sched_get_nargs(req); 1648 1649 force = gctl_get_paraml(req, "force", sizeof(*force)); 1650 if (force == NULL) { 1651 gctl_error(req, "No 'force' argument"); 1652 return; 1653 } 1654 1655 for (i = 0; i < nargs; i++) { 1656 name = g_sched_argi(req, i); 1657 if (name == NULL) 1658 break; 1659 1660 gp = g_sched_find_geom(mp, name); 1661 if (gp == NULL) { 1662 G_SCHED_DEBUG(1, "Device %s is invalid.", name); 1663 gctl_error(req, "Device %s is invalid.", name); 1664 break; 1665 } 1666 1667 error = g_sched_destroy(gp, *force); 1668 if (error != 0) { 1669 gctl_error(req, "Cannot destroy device %s (error=%d).", 1670 gp->name, error); 1671 break; 1672 } 1673 } 1674} 1675 1676static void 1677g_sched_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1678{ 1679 uint32_t *version; 1680 1681 g_topology_assert(); 1682 1683 version = gctl_get_paraml(req, "version", sizeof(*version)); 1684 if (version == NULL) { 1685 gctl_error(req, "No '%s' argument.", "version"); 1686 return; 1687 } 1688 1689 if (*version != G_SCHED_VERSION) { 1690 gctl_error(req, "Userland and kernel parts are " 1691 "out of sync."); 1692 return; 1693 } 1694 1695 if (strcmp(verb, "create") == 0) { 1696 g_sched_ctl_create(req, mp, 0); 1697 return; 1698 } else if (strcmp(verb, "insert") == 0) { 1699 g_sched_ctl_create(req, mp, 1); 1700 return; 1701 } else if (strcmp(verb, "configure") == 0) { 1702 g_sched_ctl_configure(req, mp); 1703 return; 1704 } else if (strcmp(verb, "destroy") == 0) { 1705 g_sched_ctl_destroy(req, mp); 1706 return; 1707 } 1708 1709 gctl_error(req, "Unknown verb."); 1710} 1711 1712static void 1713g_sched_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1714 struct g_consumer *cp, struct g_provider *pp) 1715{ 1716 struct g_sched_softc *sc = gp->softc; 1717 struct g_gsched *gsp = sc->sc_gsched; 1718 if (indent == NULL) { /* plaintext */ 1719 sbuf_printf(sb, " algo %s", gsp ? gsp->gs_name : "--"); 1720 } 1721 if (gsp != NULL && gsp->gs_dumpconf) 1722 gsp->gs_dumpconf(sb, indent, gp, cp, pp); 1723} 1724 1725DECLARE_GEOM_CLASS(g_sched_class, g_sched); 1726MODULE_VERSION(geom_sched, 0); 1727