1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2009-2010 Fabio Checconi 5 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30/* 31 * $Id$ 32 * $FreeBSD$ 33 * 34 * Main control module for geom-based disk schedulers ('sched'). 35 * 36 * USER VIEW 37 * A 'sched' node is typically inserted transparently between 38 * an existing provider pp and its original geom gp 39 * 40 * [pp --> gp ..] 41 * 42 * using the command "geom sched insert <provider>" and 43 * resulting in the following topology 44 * 45 * [pp --> sched_gp --> cp] [new_pp --> gp ... ] 46 * 47 * Deletion "geom sched destroy <provider>.sched." restores the 48 * original chain. The normal "geom sched create <provide>" 49 * is also supported. 50 * 51 * INTERNALS 52 * Internally, the 'sched' uses the following data structures 53 * 54 * geom{} g_sched_softc{} g_gsched{} 55 * +----------+ +---------------+ +-------------+ 56 * | softc *-|--->| sc_gsched *-|-->| gs_init | 57 * | ... | | | | gs_fini | 58 * | | | [ hash table] | | gs_start | 59 * +----------+ | | | ... | 60 * | | +-------------+ 61 * | | 62 * | | g_*_softc{} 63 * | | +-------------+ 64 * | sc_data *-|-->| | 65 * +---------------+ | algorithm- | 66 * | specific | 67 * +-------------+ 68 * 69 * A g_sched_softc{} is created with a "geom sched insert" call. 70 * In turn this instantiates a specific scheduling algorithm, 71 * which sets sc_gsched to point to the algorithm callbacks, 72 * and calls gs_init() to create the g_*_softc{} . 73 * The other callbacks (gs_start, gs_next, ...) are invoked 74 * as needed 75 * 76 * g_sched_softc{} is defined in g_sched.h and mostly used here; 77 * g_gsched{}, and the gs_callbacks, are documented in gs_scheduler.h; 78 * g_*_softc{} is defined/implemented by each algorithm (gs_*.c) 79 * 80 * DATA MOVING 81 * When a bio is received on the provider, it goes to the 82 * g_sched_start() which calls gs_start() to initially queue it; 83 * then we call g_sched_dispatch() that loops around gs_next() 84 * to select zero or more bio's to be sent downstream. 85 * 86 * g_sched_dispatch() can also be called as a result of a timeout, 87 * e.g. when doing anticipation or pacing requests. 88 * 89 * When a bio comes back, it goes to g_sched_done() which in turn 90 * calls gs_done(). The latter does any necessary housekeeping in 91 * the scheduling algorithm, and may decide to call g_sched_dispatch() 92 * to send more bio's downstream. 93 * 94 * If an algorithm needs per-flow queues, these are created 95 * calling gs_init_class() and destroyed with gs_fini_class(), 96 * and they are also inserted in the hash table implemented in 97 * the g_sched_softc{} 98 * 99 * If an algorithm is replaced, or a transparently-inserted node is 100 * removed with "geom sched destroy", we need to remove all references 101 * to the g_*_softc{} and g_sched_softc from the bio's still in 102 * the scheduler. g_sched_forced_dispatch() helps doing this. 103 * XXX need to explain better. 104 */ 105 106#include <sys/cdefs.h> 107#include <sys/param.h> 108#include <sys/systm.h> 109#include <sys/kernel.h> 110#include <sys/module.h> 111#include <sys/lock.h> 112#include <sys/mutex.h> 113#include <sys/bio.h> 114#include <sys/limits.h> 115#include <sys/hash.h> 116#include <sys/sbuf.h> 117#include <sys/sysctl.h> 118#include <sys/malloc.h> 119#include <sys/proc.h> /* we access curthread */ 120#include <geom/geom.h> 121#include "gs_scheduler.h" 122#include "g_sched.h" /* geom hooks */ 123 124/* 125 * Size of the per-geom hash table storing traffic classes. 126 * We may decide to change it at a later time, it has no ABI 127 * implications as it is only used for run-time allocations. 128 */ 129#define G_SCHED_HASH_SIZE 32 130 131static int g_sched_destroy(struct g_geom *gp, boolean_t force); 132static int g_sched_destroy_geom(struct gctl_req *req, 133 struct g_class *mp, struct g_geom *gp); 134static void g_sched_config(struct gctl_req *req, struct g_class *mp, 135 const char *verb); 136static struct g_geom *g_sched_taste(struct g_class *mp, 137 struct g_provider *pp, int flags __unused); 138static void g_sched_dumpconf(struct sbuf *sb, const char *indent, 139 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 140static void g_sched_init(struct g_class *mp); 141static void g_sched_fini(struct g_class *mp); 142static int g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, 143 int fflag, struct thread *td); 144 145struct g_class g_sched_class = { 146 .name = G_SCHED_CLASS_NAME, 147 .version = G_VERSION, 148 .ctlreq = g_sched_config, 149 .taste = g_sched_taste, 150 .destroy_geom = g_sched_destroy_geom, 151 .init = g_sched_init, 152 .ioctl = g_sched_ioctl, 153 .fini = g_sched_fini 154}; 155 156MALLOC_DEFINE(M_GEOM_SCHED, "GEOM_SCHED", "Geom schedulers data structures"); 157 158/* 159 * Global variables describing the state of the geom_sched module. 160 * There is only one static instance of this structure. 161 */ 162LIST_HEAD(gs_list, g_gsched); /* type, link field */ 163struct geom_sched_vars { 164 struct mtx gs_mtx; 165 struct gs_list gs_scheds; /* list of algorithms */ 166 u_int gs_debug; 167 u_int gs_sched_count; /* how many algorithms ? */ 168 u_int gs_patched; /* g_io_request was patched */ 169 170 u_int gs_initialized; 171 u_int gs_expire_secs; /* expiration of hash entries */ 172 173 struct bio_queue_head gs_pending; 174 u_int gs_npending; 175 176 /* The following are for stats, usually protected by gs_mtx. */ 177 u_long gs_requests; /* total requests */ 178 u_long gs_done; /* total done */ 179 u_int gs_in_flight; /* requests in flight */ 180 u_int gs_writes_in_flight; 181 u_int gs_bytes_in_flight; 182 u_int gs_write_bytes_in_flight; 183 184 char gs_names[256]; /* names of schedulers */ 185}; 186 187static struct geom_sched_vars me = { 188 .gs_expire_secs = 10, 189}; 190 191SYSCTL_DECL(_kern_geom); 192SYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0, 193 "GEOM_SCHED stuff"); 194 195SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_wb, CTLFLAG_RD, 196 &me.gs_write_bytes_in_flight, 0, "Write bytes in flight"); 197 198SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_b, CTLFLAG_RD, 199 &me.gs_bytes_in_flight, 0, "Bytes in flight"); 200 201SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_w, CTLFLAG_RD, 202 &me.gs_writes_in_flight, 0, "Write Requests in flight"); 203 204SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight, CTLFLAG_RD, 205 &me.gs_in_flight, 0, "Requests in flight"); 206 207SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, done, CTLFLAG_RD, 208 &me.gs_done, 0, "Total done"); 209 210SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, requests, CTLFLAG_RD, 211 &me.gs_requests, 0, "Total requests"); 212 213SYSCTL_STRING(_kern_geom_sched, OID_AUTO, algorithms, CTLFLAG_RD, 214 &me.gs_names, 0, "Algorithm names"); 215 216SYSCTL_UINT(_kern_geom_sched, OID_AUTO, alg_count, CTLFLAG_RD, 217 &me.gs_sched_count, 0, "Number of algorithms"); 218 219SYSCTL_UINT(_kern_geom_sched, OID_AUTO, debug, CTLFLAG_RW, 220 &me.gs_debug, 0, "Debug level"); 221 222SYSCTL_UINT(_kern_geom_sched, OID_AUTO, expire_secs, CTLFLAG_RW, 223 &me.gs_expire_secs, 0, "Expire time in seconds"); 224 225/* 226 * g_sched calls the scheduler algorithms with this lock held. 227 * The locking functions are exposed so the scheduler algorithms can also 228 * protect themselves e.g. when running a callout handler. 229 */ 230void 231g_sched_lock(struct g_geom *gp) 232{ 233 struct g_sched_softc *sc = gp->softc; 234 235 mtx_lock(&sc->sc_mtx); 236} 237 238void 239g_sched_unlock(struct g_geom *gp) 240{ 241 struct g_sched_softc *sc = gp->softc; 242 243 mtx_unlock(&sc->sc_mtx); 244} 245 246/* 247 * Support functions to handle references to the module, 248 * which are coming from devices using this scheduler. 249 */ 250static inline void 251g_gsched_ref(struct g_gsched *gsp) 252{ 253 254 atomic_add_int(&gsp->gs_refs, 1); 255} 256 257static inline void 258g_gsched_unref(struct g_gsched *gsp) 259{ 260 261 atomic_add_int(&gsp->gs_refs, -1); 262} 263 264/* 265 * Update the stats when this request is done. 266 */ 267static void 268g_sched_update_stats(struct bio *bio) 269{ 270 271 me.gs_done++; 272 me.gs_in_flight--; 273 me.gs_bytes_in_flight -= bio->bio_length; 274 if (bio->bio_cmd == BIO_WRITE) { 275 me.gs_writes_in_flight--; 276 me.gs_write_bytes_in_flight -= bio->bio_length; 277 } 278} 279 280/* 281 * Dispatch any pending request. 282 */ 283static void 284g_sched_forced_dispatch(struct g_geom *gp) 285{ 286 struct g_sched_softc *sc = gp->softc; 287 struct g_gsched *gsp = sc->sc_gsched; 288 struct bio *bp; 289 290 KASSERT(mtx_owned(&sc->sc_mtx), 291 ("sc_mtx not owned during forced dispatch")); 292 293 while ((bp = gsp->gs_next(sc->sc_data, 1)) != NULL) 294 g_io_request(bp, LIST_FIRST(&gp->consumer)); 295} 296 297/* 298 * The main dispatch loop, called either here after the start 299 * routine, or by scheduling algorithms when they receive a timeout 300 * or a 'done' notification. Does not share code with the forced 301 * dispatch path, since the gs_done() callback can call us. 302 */ 303void 304g_sched_dispatch(struct g_geom *gp) 305{ 306 struct g_sched_softc *sc = gp->softc; 307 struct g_gsched *gsp = sc->sc_gsched; 308 struct bio *bp; 309 310 KASSERT(mtx_owned(&sc->sc_mtx), ("sc_mtx not owned during dispatch")); 311 312 if ((sc->sc_flags & G_SCHED_FLUSHING)) 313 return; 314 315 while ((bp = gsp->gs_next(sc->sc_data, 0)) != NULL) 316 g_io_request(bp, LIST_FIRST(&gp->consumer)); 317} 318 319/* 320 * Recent (8.0 and above) versions of FreeBSD have support to 321 * register classifiers of disk requests. The classifier is 322 * invoked by g_io_request(), and stores the information into 323 * bp->bio_classifier1. 324 * 325 * Support for older versions, which is left here only for 326 * documentation purposes, relies on two hacks: 327 * 1. classification info is written into the bio_caller1 328 * field of the topmost node in the bio chain. This field 329 * is rarely used, but this module is incompatible with 330 * those that use bio_caller1 for other purposes, 331 * such as ZFS and gjournal; 332 * 2. g_io_request() is patched in-memory when the module is 333 * loaded, so that the function calls a classifier as its 334 * first thing. g_io_request() is restored when the module 335 * is unloaded. This functionality is only supported for 336 * x86 and amd64, other architectures need source code changes. 337 */ 338 339/* 340 * Lookup the identity of the issuer of the original request. 341 * In the current implementation we use the curthread of the 342 * issuer, but different mechanisms may be implemented later 343 * so we do not make assumptions on the return value which for 344 * us is just an opaque identifier. 345 */ 346 347static inline u_long 348g_sched_classify(struct bio *bp) 349{ 350 351 /* we have classifier fields in the struct bio */ 352 return ((u_long)bp->bio_classifier1); 353} 354 355/* Return the hash chain for the given key. */ 356static inline struct g_hash * 357g_sched_hash(struct g_sched_softc *sc, u_long key) 358{ 359 360 return (&sc->sc_hash[key & sc->sc_mask]); 361} 362 363/* 364 * Helper function for the children classes, which takes 365 * a geom and a bio and returns the private descriptor 366 * associated to the request. This involves fetching 367 * the classification field and [al]locating the 368 * corresponding entry in the hash table. 369 */ 370void * 371g_sched_get_class(struct g_geom *gp, struct bio *bp) 372{ 373 struct g_sched_softc *sc; 374 struct g_sched_class *gsc; 375 struct g_gsched *gsp; 376 struct g_hash *bucket; 377 u_long key; 378 379 sc = gp->softc; 380 key = g_sched_classify(bp); 381 bucket = g_sched_hash(sc, key); 382 LIST_FOREACH(gsc, bucket, gsc_clist) { 383 if (key == gsc->gsc_key) { 384 gsc->gsc_refs++; 385 return (gsc->gsc_priv); 386 } 387 } 388 389 gsp = sc->sc_gsched; 390 gsc = malloc(sizeof(*gsc) + gsp->gs_priv_size, 391 M_GEOM_SCHED, M_NOWAIT | M_ZERO); 392 if (!gsc) 393 return (NULL); 394 395 if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) { 396 free(gsc, M_GEOM_SCHED); 397 return (NULL); 398 } 399 400 gsc->gsc_refs = 2; /* 1 for the hash table, 1 for the caller. */ 401 gsc->gsc_key = key; 402 LIST_INSERT_HEAD(bucket, gsc, gsc_clist); 403 404 gsc->gsc_expire = ticks + me.gs_expire_secs * hz; 405 406 return (gsc->gsc_priv); 407} 408 409/* 410 * Release a reference to the per-client descriptor, 411 */ 412void 413g_sched_put_class(struct g_geom *gp, void *priv) 414{ 415 struct g_sched_class *gsc; 416 struct g_sched_softc *sc; 417 418 gsc = g_sched_priv2class(priv); 419 gsc->gsc_expire = ticks + me.gs_expire_secs * hz; 420 421 if (--gsc->gsc_refs > 0) 422 return; 423 424 sc = gp->softc; 425 sc->sc_gsched->gs_fini_class(sc->sc_data, priv); 426 427 LIST_REMOVE(gsc, gsc_clist); 428 free(gsc, M_GEOM_SCHED); 429} 430 431static void 432g_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask, 433 struct g_gsched *gsp, void *data) 434{ 435 struct g_sched_class *cp, *cp2; 436 int i; 437 438 if (!hp) 439 return; 440 441 if (data && gsp->gs_hash_unref) 442 gsp->gs_hash_unref(data); 443 444 for (i = 0; i < G_SCHED_HASH_SIZE; i++) { 445 LIST_FOREACH_SAFE(cp, &hp[i], gsc_clist, cp2) 446 g_sched_put_class(gp, cp->gsc_priv); 447 } 448 449 hashdestroy(hp, M_GEOM_SCHED, mask); 450} 451 452static struct g_hash * 453g_sched_hash_init(struct g_gsched *gsp, u_long *mask, int flags) 454{ 455 struct g_hash *hash; 456 457 if (gsp->gs_priv_size == 0) 458 return (NULL); 459 460 hash = hashinit_flags(G_SCHED_HASH_SIZE, M_GEOM_SCHED, mask, flags); 461 462 return (hash); 463} 464 465static void 466g_sched_flush_classes(struct g_geom *gp) 467{ 468 struct g_sched_softc *sc; 469 struct g_sched_class *cp, *cp2; 470 int i; 471 472 sc = gp->softc; 473 474 if (!sc->sc_hash || ticks - sc->sc_flush_ticks <= 0) 475 return; 476 477 for (i = 0; i < G_SCHED_HASH_SIZE; i++) { 478 LIST_FOREACH_SAFE(cp, &sc->sc_hash[i], gsc_clist, cp2) { 479 if (cp->gsc_refs == 1 && ticks - cp->gsc_expire > 0) 480 g_sched_put_class(gp, cp->gsc_priv); 481 } 482 } 483 484 sc->sc_flush_ticks = ticks + me.gs_expire_secs * hz; 485} 486 487/* 488 * Wait for the completion of any outstanding request. To ensure 489 * that this does not take forever the caller has to make sure that 490 * no new request enter the scehduler before calling us. 491 * 492 * Must be called with the gp mutex held and topology locked. 493 */ 494static int 495g_sched_wait_pending(struct g_geom *gp) 496{ 497 struct g_sched_softc *sc = gp->softc; 498 int endticks = ticks + hz; 499 500 g_topology_assert(); 501 502 while (sc->sc_pending && endticks - ticks >= 0) 503 msleep(gp, &sc->sc_mtx, 0, "sched_wait_pending", hz / 4); 504 505 return (sc->sc_pending ? ETIMEDOUT : 0); 506} 507 508static int 509g_sched_remove_locked(struct g_geom *gp, struct g_gsched *gsp) 510{ 511 struct g_sched_softc *sc = gp->softc; 512 int error; 513 514 /* Set the flushing flag: new bios will not enter the scheduler. */ 515 sc->sc_flags |= G_SCHED_FLUSHING; 516 517 g_sched_forced_dispatch(gp); 518 error = g_sched_wait_pending(gp); 519 if (error) 520 goto failed; 521 522 /* No more requests pending or in flight from the old gsp. */ 523 524 g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, gsp, sc->sc_data); 525 sc->sc_hash = NULL; 526 527 /* 528 * Avoid deadlock here by releasing the gp mutex and reacquiring 529 * it once done. It should be safe, since no reconfiguration or 530 * destruction can take place due to the geom topology lock; no 531 * new request can use the current sc_data since we flagged the 532 * geom as being flushed. 533 */ 534 g_sched_unlock(gp); 535 gsp->gs_fini(sc->sc_data); 536 g_sched_lock(gp); 537 538 sc->sc_gsched = NULL; 539 sc->sc_data = NULL; 540 g_gsched_unref(gsp); 541 542failed: 543 sc->sc_flags &= ~G_SCHED_FLUSHING; 544 545 return (error); 546} 547 548static int 549g_sched_remove(struct g_geom *gp, struct g_gsched *gsp) 550{ 551 int error; 552 553 g_sched_lock(gp); 554 error = g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */ 555 g_sched_unlock(gp); 556 557 return (error); 558} 559 560/* 561 * Support function for create/taste -- locate the desired 562 * algorithm and grab a reference to it. 563 */ 564static struct g_gsched * 565g_gsched_find(const char *name) 566{ 567 struct g_gsched *gsp = NULL; 568 569 mtx_lock(&me.gs_mtx); 570 LIST_FOREACH(gsp, &me.gs_scheds, glist) { 571 if (strcmp(name, gsp->gs_name) == 0) { 572 g_gsched_ref(gsp); 573 break; 574 } 575 } 576 mtx_unlock(&me.gs_mtx); 577 578 return (gsp); 579} 580 581/* 582 * Rebuild the list of scheduler names. 583 * To be called with me.gs_mtx lock held. 584 */ 585static void 586g_gsched_build_names(struct g_gsched *gsp) 587{ 588 int pos, l; 589 struct g_gsched *cur; 590 591 pos = 0; 592 LIST_FOREACH(cur, &me.gs_scheds, glist) { 593 l = strlen(cur->gs_name); 594 if (l + pos + 1 + 1 < sizeof(me.gs_names)) { 595 if (pos != 0) 596 me.gs_names[pos++] = ' '; 597 strcpy(me.gs_names + pos, cur->gs_name); 598 pos += l; 599 } 600 } 601 me.gs_names[pos] = '\0'; 602} 603 604/* 605 * Register or unregister individual scheduling algorithms. 606 */ 607static int 608g_gsched_register(struct g_gsched *gsp) 609{ 610 struct g_gsched *cur; 611 int error = 0; 612 613 mtx_lock(&me.gs_mtx); 614 LIST_FOREACH(cur, &me.gs_scheds, glist) { 615 if (strcmp(gsp->gs_name, cur->gs_name) == 0) 616 break; 617 } 618 if (cur != NULL) { 619 G_SCHED_DEBUG(0, "A scheduler named %s already" 620 "exists.", gsp->gs_name); 621 error = EEXIST; 622 } else { 623 LIST_INSERT_HEAD(&me.gs_scheds, gsp, glist); 624 gsp->gs_refs = 1; 625 me.gs_sched_count++; 626 g_gsched_build_names(gsp); 627 } 628 mtx_unlock(&me.gs_mtx); 629 630 return (error); 631} 632 633struct g_gsched_unregparm { 634 struct g_gsched *gup_gsp; 635 int gup_error; 636}; 637 638static void 639g_gsched_unregister(void *arg, int flag) 640{ 641 struct g_gsched_unregparm *parm = arg; 642 struct g_gsched *gsp = parm->gup_gsp, *cur, *tmp; 643 struct g_sched_softc *sc; 644 struct g_geom *gp, *gp_tmp; 645 int error; 646 647 parm->gup_error = 0; 648 649 g_topology_assert(); 650 651 if (flag == EV_CANCEL) 652 return; 653 654 mtx_lock(&me.gs_mtx); 655 656 LIST_FOREACH_SAFE(gp, &g_sched_class.geom, geom, gp_tmp) { 657 if (gp->class != &g_sched_class) 658 continue; /* Should not happen. */ 659 660 sc = gp->softc; 661 if (sc->sc_gsched == gsp) { 662 error = g_sched_remove(gp, gsp); 663 if (error) 664 goto failed; 665 } 666 } 667 668 LIST_FOREACH_SAFE(cur, &me.gs_scheds, glist, tmp) { 669 if (cur != gsp) 670 continue; 671 672 if (gsp->gs_refs != 1) { 673 G_SCHED_DEBUG(0, "%s still in use.", 674 gsp->gs_name); 675 parm->gup_error = EBUSY; 676 } else { 677 LIST_REMOVE(gsp, glist); 678 me.gs_sched_count--; 679 g_gsched_build_names(gsp); 680 } 681 break; 682 } 683 684 if (cur == NULL) { 685 G_SCHED_DEBUG(0, "%s not registered.", gsp->gs_name); 686 parm->gup_error = ENOENT; 687 } 688 689failed: 690 mtx_unlock(&me.gs_mtx); 691} 692 693static inline void 694g_gsched_global_init(void) 695{ 696 697 if (!me.gs_initialized) { 698 G_SCHED_DEBUG(0, "Initializing global data."); 699 mtx_init(&me.gs_mtx, "gsched", NULL, MTX_DEF); 700 LIST_INIT(&me.gs_scheds); 701 bioq_init(&me.gs_pending); 702 me.gs_initialized = 1; 703 } 704} 705 706/* 707 * Module event called when a scheduling algorithm module is loaded or 708 * unloaded. 709 */ 710int 711g_gsched_modevent(module_t mod, int cmd, void *arg) 712{ 713 struct g_gsched *gsp = arg; 714 struct g_gsched_unregparm parm; 715 int error; 716 717 G_SCHED_DEBUG(0, "Modevent %d.", cmd); 718 719 /* 720 * If the module is loaded at boot, the geom thread that calls 721 * g_sched_init() might actually run after g_gsched_modevent(), 722 * so make sure that the module is properly initialized. 723 */ 724 g_gsched_global_init(); 725 726 error = EOPNOTSUPP; 727 switch (cmd) { 728 case MOD_LOAD: 729 error = g_gsched_register(gsp); 730 G_SCHED_DEBUG(0, "Loaded module %s error %d.", 731 gsp->gs_name, error); 732 if (error == 0) 733 g_retaste(&g_sched_class); 734 break; 735 736 case MOD_UNLOAD: 737 parm.gup_gsp = gsp; 738 parm.gup_error = 0; 739 740 error = g_waitfor_event(g_gsched_unregister, 741 &parm, M_WAITOK, NULL); 742 if (error == 0) 743 error = parm.gup_error; 744 G_SCHED_DEBUG(0, "Unloaded module %s error %d.", 745 gsp->gs_name, error); 746 break; 747 } 748 749 return (error); 750} 751 752#ifdef KTR 753#define TRC_BIO_EVENT(e, bp) g_sched_trace_bio_ ## e (bp) 754 755static inline char 756g_sched_type(struct bio *bp) 757{ 758 759 if (bp->bio_cmd == BIO_READ) 760 return ('R'); 761 else if (bp->bio_cmd == BIO_WRITE) 762 return ('W'); 763 return ('U'); 764} 765 766static inline void 767g_sched_trace_bio_START(struct bio *bp) 768{ 769 770 CTR5(KTR_GSCHED, "S %lu %c %lu/%lu %lu", g_sched_classify(bp), 771 g_sched_type(bp), bp->bio_offset / ULONG_MAX, 772 bp->bio_offset, bp->bio_length); 773} 774 775static inline void 776g_sched_trace_bio_DONE(struct bio *bp) 777{ 778 779 CTR5(KTR_GSCHED, "D %lu %c %lu/%lu %lu", g_sched_classify(bp), 780 g_sched_type(bp), bp->bio_offset / ULONG_MAX, 781 bp->bio_offset, bp->bio_length); 782} 783#else /* !KTR */ 784#define TRC_BIO_EVENT(e, bp) 785#endif /* !KTR */ 786 787/* 788 * g_sched_done() and g_sched_start() dispatch the geom requests to 789 * the scheduling algorithm in use. 790 */ 791static void 792g_sched_done(struct bio *bio) 793{ 794 struct g_geom *gp = bio->bio_caller2; 795 struct g_sched_softc *sc = gp->softc; 796 797 TRC_BIO_EVENT(DONE, bio); 798 799 KASSERT(bio->bio_caller1, ("null bio_caller1 in g_sched_done")); 800 801 g_sched_lock(gp); 802 803 g_sched_update_stats(bio); 804 sc->sc_gsched->gs_done(sc->sc_data, bio); 805 if (!--sc->sc_pending) 806 wakeup(gp); 807 808 g_sched_flush_classes(gp); 809 g_sched_unlock(gp); 810 811 g_std_done(bio); 812} 813 814static void 815g_sched_start(struct bio *bp) 816{ 817 struct g_geom *gp = bp->bio_to->geom; 818 struct g_sched_softc *sc = gp->softc; 819 struct bio *cbp; 820 821 TRC_BIO_EVENT(START, bp); 822 G_SCHED_LOGREQ(bp, "Request received."); 823 824 cbp = g_clone_bio(bp); 825 if (cbp == NULL) { 826 g_io_deliver(bp, ENOMEM); 827 return; 828 } 829 cbp->bio_done = g_sched_done; 830 cbp->bio_to = LIST_FIRST(&gp->provider); 831 KASSERT(cbp->bio_to != NULL, ("NULL provider")); 832 833 /* We only schedule reads and writes. */ 834 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE) 835 goto bypass; 836 837 G_SCHED_LOGREQ(cbp, "Sending request."); 838 839 g_sched_lock(gp); 840 /* 841 * Call the algorithm's gs_start to queue the request in the 842 * scheduler. If gs_start fails then pass the request down, 843 * otherwise call g_sched_dispatch() which tries to push 844 * one or more requests down. 845 */ 846 if (!sc->sc_gsched || (sc->sc_flags & G_SCHED_FLUSHING) || 847 sc->sc_gsched->gs_start(sc->sc_data, cbp)) { 848 g_sched_unlock(gp); 849 goto bypass; 850 } 851 /* 852 * We use bio_caller1 to mark requests that are scheduled 853 * so make sure it is not NULL. 854 */ 855 if (cbp->bio_caller1 == NULL) 856 cbp->bio_caller1 = &me; /* anything not NULL */ 857 858 cbp->bio_caller2 = gp; 859 sc->sc_pending++; 860 861 /* Update general stats. */ 862 me.gs_in_flight++; 863 me.gs_requests++; 864 me.gs_bytes_in_flight += bp->bio_length; 865 if (bp->bio_cmd == BIO_WRITE) { 866 me.gs_writes_in_flight++; 867 me.gs_write_bytes_in_flight += bp->bio_length; 868 } 869 g_sched_dispatch(gp); 870 g_sched_unlock(gp); 871 return; 872 873bypass: 874 cbp->bio_done = g_std_done; 875 cbp->bio_caller1 = NULL; /* not scheduled */ 876 g_io_request(cbp, LIST_FIRST(&gp->consumer)); 877} 878 879/* 880 * The next few functions are the geom glue. 881 */ 882static void 883g_sched_orphan(struct g_consumer *cp) 884{ 885 886 g_topology_assert(); 887 g_sched_destroy(cp->geom, 1); 888} 889 890static int 891g_sched_access(struct g_provider *pp, int dr, int dw, int de) 892{ 893 struct g_geom *gp; 894 struct g_consumer *cp; 895 int error; 896 897 gp = pp->geom; 898 cp = LIST_FIRST(&gp->consumer); 899 error = g_access(cp, dr, dw, de); 900 901 return (error); 902} 903 904static void 905g_sched_temporary_start(struct bio *bio) 906{ 907 908 mtx_lock(&me.gs_mtx); 909 me.gs_npending++; 910 bioq_disksort(&me.gs_pending, bio); 911 mtx_unlock(&me.gs_mtx); 912} 913 914static void 915g_sched_flush_pending(g_start_t *start) 916{ 917 struct bio *bp; 918 919 while ((bp = bioq_takefirst(&me.gs_pending))) 920 start(bp); 921} 922 923static int 924g_insert_proxy(struct g_geom *gp, struct g_provider *newpp, 925 struct g_geom *dstgp, struct g_provider *pp, struct g_consumer *cp) 926{ 927 struct g_sched_softc *sc = gp->softc; 928 g_start_t *saved_start, *flush = g_sched_start; 929 int error = 0, endticks = ticks + hz; 930 931 g_cancel_event(newpp); /* prevent taste() */ 932 /* copy private fields */ 933 newpp->private = pp->private; 934 newpp->index = pp->index; 935 936 /* Queue all the early requests coming for us. */ 937 me.gs_npending = 0; 938 saved_start = pp->geom->start; 939 dstgp->start = g_sched_temporary_start; 940 941 while (pp->nstart - pp->nend != me.gs_npending && 942 endticks - ticks >= 0) 943 tsleep(pp, PRIBIO, "-", hz/10); 944 945 if (pp->nstart - pp->nend != me.gs_npending) { 946 flush = saved_start; 947 error = ETIMEDOUT; 948 goto fail; 949 } 950 951 /* link pp to this geom */ 952 LIST_REMOVE(pp, provider); 953 pp->geom = gp; 954 LIST_INSERT_HEAD(&gp->provider, pp, provider); 955 956 /* 957 * replicate the counts from the parent in the 958 * new provider and consumer nodes 959 */ 960 cp->acr = newpp->acr = pp->acr; 961 cp->acw = newpp->acw = pp->acw; 962 cp->ace = newpp->ace = pp->ace; 963 sc->sc_flags |= G_SCHED_PROXYING; 964 965fail: 966 dstgp->start = saved_start; 967 968 g_sched_flush_pending(flush); 969 970 return (error); 971} 972 973/* 974 * Create a geom node for the device passed as *pp. 975 * If successful, add a reference to this gsp. 976 */ 977static int 978g_sched_create(struct gctl_req *req, struct g_class *mp, 979 struct g_provider *pp, struct g_gsched *gsp, int proxy) 980{ 981 struct g_sched_softc *sc = NULL; 982 struct g_geom *gp, *dstgp; 983 struct g_provider *newpp = NULL; 984 struct g_consumer *cp = NULL; 985 char name[64]; 986 int error; 987 988 g_topology_assert(); 989 990 snprintf(name, sizeof(name), "%s%s", pp->name, G_SCHED_SUFFIX); 991 LIST_FOREACH(gp, &mp->geom, geom) { 992 if (strcmp(gp->name, name) == 0) { 993 gctl_error(req, "Geom %s already exists.", 994 name); 995 return (EEXIST); 996 } 997 } 998 999 gp = g_new_geomf(mp, "%s", name); 1000 dstgp = proxy ? pp->geom : gp; /* where do we link the provider */ 1001 1002 sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); 1003 sc->sc_gsched = gsp; 1004 sc->sc_data = gsp->gs_init(gp); 1005 if (sc->sc_data == NULL) { 1006 error = ENOMEM; 1007 goto fail; 1008 } 1009 1010 sc->sc_hash = g_sched_hash_init(gsp, &sc->sc_mask, HASH_WAITOK); 1011 1012 /* 1013 * Do not initialize the flush mechanism, will be initialized 1014 * on the first insertion on the hash table. 1015 */ 1016 1017 mtx_init(&sc->sc_mtx, "g_sched_mtx", NULL, MTX_DEF); 1018 1019 gp->softc = sc; 1020 gp->start = g_sched_start; 1021 gp->orphan = g_sched_orphan; 1022 gp->access = g_sched_access; 1023 gp->dumpconf = g_sched_dumpconf; 1024 1025 newpp = g_new_providerf(dstgp, "%s", gp->name); 1026 newpp->mediasize = pp->mediasize; 1027 newpp->sectorsize = pp->sectorsize; 1028 1029 cp = g_new_consumer(gp); 1030 error = g_attach(cp, proxy ? newpp : pp); 1031 if (error != 0) { 1032 gctl_error(req, "Cannot attach to provider %s.", 1033 pp->name); 1034 goto fail; 1035 } 1036 1037 g_error_provider(newpp, 0); 1038 if (proxy) { 1039 error = g_insert_proxy(gp, newpp, dstgp, pp, cp); 1040 if (error) 1041 goto fail; 1042 } 1043 G_SCHED_DEBUG(0, "Device %s created.", gp->name); 1044 1045 g_gsched_ref(gsp); 1046 1047 return (0); 1048 1049fail: 1050 if (cp != NULL) { 1051 if (cp->provider != NULL) 1052 g_detach(cp); 1053 g_destroy_consumer(cp); 1054 } 1055 if (newpp != NULL) 1056 g_destroy_provider(newpp); 1057 if (sc->sc_hash) 1058 g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, 1059 gsp, sc->sc_data); 1060 if (sc->sc_data) 1061 gsp->gs_fini(sc->sc_data); 1062 g_free(gp->softc); 1063 g_destroy_geom(gp); 1064 1065 return (error); 1066} 1067 1068/* 1069 * Support for dynamic switching of scheduling algorithms. 1070 * First initialize the data structures for the new algorithm, 1071 * then call g_sched_remove_locked() to flush all references 1072 * to the old one, finally link the new algorithm. 1073 */ 1074static int 1075g_sched_change_algo(struct gctl_req *req, struct g_class *mp, 1076 struct g_provider *pp, struct g_gsched *gsp) 1077{ 1078 struct g_sched_softc *sc; 1079 struct g_geom *gp; 1080 struct g_hash *newh; 1081 void *data; 1082 u_long mask; 1083 int error = 0; 1084 1085 gp = pp->geom; 1086 sc = gp->softc; 1087 1088 data = gsp->gs_init(gp); 1089 if (data == NULL) 1090 return (ENOMEM); 1091 1092 newh = g_sched_hash_init(gsp, &mask, HASH_WAITOK); 1093 if (gsp->gs_priv_size && !newh) { 1094 error = ENOMEM; 1095 goto fail; 1096 } 1097 1098 g_sched_lock(gp); 1099 if (sc->sc_gsched) { /* can be NULL in some cases */ 1100 error = g_sched_remove_locked(gp, sc->sc_gsched); 1101 if (error) 1102 goto fail; 1103 } 1104 1105 g_gsched_ref(gsp); 1106 sc->sc_gsched = gsp; 1107 sc->sc_data = data; 1108 sc->sc_hash = newh; 1109 sc->sc_mask = mask; 1110 1111 g_sched_unlock(gp); 1112 1113 return (0); 1114 1115fail: 1116 if (newh) 1117 g_sched_hash_fini(gp, newh, mask, gsp, data); 1118 1119 if (data) 1120 gsp->gs_fini(data); 1121 1122 g_sched_unlock(gp); 1123 1124 return (error); 1125} 1126 1127/* 1128 * Stop the request flow directed to the proxy, redirecting the new 1129 * requests to the me.gs_pending queue. 1130 */ 1131static struct g_provider * 1132g_detach_proxy(struct g_geom *gp) 1133{ 1134 struct g_consumer *cp; 1135 struct g_provider *pp, *newpp; 1136 1137 do { 1138 pp = LIST_FIRST(&gp->provider); 1139 if (pp == NULL) 1140 break; 1141 cp = LIST_FIRST(&gp->consumer); 1142 if (cp == NULL) 1143 break; 1144 newpp = cp->provider; 1145 if (newpp == NULL) 1146 break; 1147 1148 me.gs_npending = 0; 1149 pp->geom->start = g_sched_temporary_start; 1150 1151 return (pp); 1152 } while (0); 1153 printf("%s error detaching proxy %s\n", __FUNCTION__, gp->name); 1154 1155 return (NULL); 1156} 1157 1158static void 1159g_sched_blackhole(struct bio *bp) 1160{ 1161 1162 g_io_deliver(bp, ENXIO); 1163} 1164 1165static inline void 1166g_reparent_provider(struct g_provider *pp, struct g_geom *gp, 1167 struct g_provider *newpp) 1168{ 1169 1170 LIST_REMOVE(pp, provider); 1171 if (newpp) { 1172 pp->private = newpp->private; 1173 pp->index = newpp->index; 1174 } 1175 pp->geom = gp; 1176 LIST_INSERT_HEAD(&gp->provider, pp, provider); 1177} 1178 1179static inline void 1180g_unproxy_provider(struct g_provider *oldpp, struct g_provider *newpp) 1181{ 1182 struct g_geom *gp = oldpp->geom; 1183 1184 g_reparent_provider(oldpp, newpp->geom, newpp); 1185 1186 /* 1187 * Hackish: let the system destroy the old provider for us, just 1188 * in case someone attached a consumer to it, in which case a 1189 * direct call to g_destroy_provider() would not work. 1190 */ 1191 g_reparent_provider(newpp, gp, NULL); 1192} 1193 1194/* 1195 * Complete the proxy destruction, linking the old provider to its 1196 * original geom, and destroying the proxy provider. Also take care 1197 * of issuing the pending requests collected in me.gs_pending (if any). 1198 */ 1199static int 1200g_destroy_proxy(struct g_geom *gp, struct g_provider *oldpp) 1201{ 1202 struct g_consumer *cp; 1203 struct g_provider *newpp; 1204 1205 do { 1206 cp = LIST_FIRST(&gp->consumer); 1207 if (cp == NULL) 1208 break; 1209 newpp = cp->provider; 1210 if (newpp == NULL) 1211 break; 1212 1213 /* Relink the provider to its original geom. */ 1214 g_unproxy_provider(oldpp, newpp); 1215 1216 /* Detach consumer from provider, and destroy provider. */ 1217 cp->acr = newpp->acr = 0; 1218 cp->acw = newpp->acw = 0; 1219 cp->ace = newpp->ace = 0; 1220 g_detach(cp); 1221 1222 /* Send the pending bios through the right start function. */ 1223 g_sched_flush_pending(oldpp->geom->start); 1224 1225 return (0); 1226 } while (0); 1227 printf("%s error destroying proxy %s\n", __FUNCTION__, gp->name); 1228 1229 /* We cannot send the pending bios anywhere... */ 1230 g_sched_flush_pending(g_sched_blackhole); 1231 1232 return (EINVAL); 1233} 1234 1235static int 1236g_sched_destroy(struct g_geom *gp, boolean_t force) 1237{ 1238 struct g_provider *pp, *oldpp = NULL; 1239 struct g_sched_softc *sc; 1240 struct g_gsched *gsp; 1241 int error; 1242 1243 g_topology_assert(); 1244 sc = gp->softc; 1245 if (sc == NULL) 1246 return (ENXIO); 1247 if (!(sc->sc_flags & G_SCHED_PROXYING)) { 1248 pp = LIST_FIRST(&gp->provider); 1249 if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 1250 const char *msg = force ? 1251 "but we force removal" : "cannot remove"; 1252 1253 G_SCHED_DEBUG(!force, 1254 "Device %s is still open (r%dw%de%d), %s.", 1255 pp->name, pp->acr, pp->acw, pp->ace, msg); 1256 if (!force) 1257 return (EBUSY); 1258 } else { 1259 G_SCHED_DEBUG(0, "Device %s removed.", gp->name); 1260 } 1261 } else 1262 oldpp = g_detach_proxy(gp); 1263 1264 gsp = sc->sc_gsched; 1265 if (gsp) { 1266 /* 1267 * XXX bad hack here: force a dispatch to release 1268 * any reference to the hash table still held by 1269 * the scheduler. 1270 */ 1271 g_sched_lock(gp); 1272 /* 1273 * We are dying here, no new requests should enter 1274 * the scheduler. This is granted by the topolgy, 1275 * either in case we were proxying (new bios are 1276 * being redirected) or not (see the access check 1277 * above). 1278 */ 1279 g_sched_forced_dispatch(gp); 1280 error = g_sched_wait_pending(gp); 1281 1282 if (error) { 1283 /* 1284 * Not all the requests came home: this might happen 1285 * under heavy load, or if we were waiting for any 1286 * bio which is served in the event path (see 1287 * geom_slice.c for an example of how this can 1288 * happen). Try to restore a working configuration 1289 * if we can fail. 1290 */ 1291 if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) { 1292 g_sched_flush_pending(force ? 1293 g_sched_blackhole : g_sched_start); 1294 } 1295 1296 /* 1297 * In the forced destroy case there is not so much 1298 * we can do, we have pending bios that will call 1299 * g_sched_done() somehow, and we don't want them 1300 * to crash the system using freed memory. We tell 1301 * the user that something went wrong, and leak some 1302 * memory here. 1303 * Note: the callers using force = 1 ignore the 1304 * return value. 1305 */ 1306 if (force) { 1307 G_SCHED_DEBUG(0, "Pending requests while " 1308 " destroying geom, some memory leaked."); 1309 } 1310 1311 return (error); 1312 } 1313 1314 g_sched_unlock(gp); 1315 g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, 1316 gsp, sc->sc_data); 1317 sc->sc_hash = NULL; 1318 gsp->gs_fini(sc->sc_data); 1319 g_gsched_unref(gsp); 1320 sc->sc_gsched = NULL; 1321 } else 1322 error = 0; 1323 1324 if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) { 1325 error = g_destroy_proxy(gp, oldpp); 1326 1327 if (error) { 1328 if (force) { 1329 G_SCHED_DEBUG(0, "Unrecoverable error while " 1330 "destroying a proxy geom, leaking some " 1331 " memory."); 1332 } 1333 1334 return (error); 1335 } 1336 } 1337 1338 mtx_destroy(&sc->sc_mtx); 1339 1340 g_free(gp->softc); 1341 gp->softc = NULL; 1342 g_wither_geom(gp, ENXIO); 1343 1344 return (error); 1345} 1346 1347static int 1348g_sched_destroy_geom(struct gctl_req *req, struct g_class *mp, 1349 struct g_geom *gp) 1350{ 1351 1352 return (g_sched_destroy(gp, 0)); 1353} 1354 1355/* 1356 * Functions related to the classification of requests. 1357 * 1358 * On recent FreeBSD versions (8.0 and above), we store a reference 1359 * to the issuer of a request in bp->bio_classifier1 as soon 1360 * as the bio is posted to the geom queue (and not later, because 1361 * requests are managed by the g_down thread afterwards). 1362 */ 1363 1364/* 1365 * Classifier support for recent FreeBSD versions: we use 1366 * a very simple classifier, only use curthread to tag a request. 1367 * The classifier is registered at module load, and unregistered 1368 * at module unload. 1369 */ 1370static int 1371g_sched_tag(void *arg, struct bio *bp) 1372{ 1373 1374 bp->bio_classifier1 = curthread; 1375 return (1); 1376} 1377 1378static struct g_classifier_hook g_sched_classifier = { 1379 .func = g_sched_tag, 1380}; 1381 1382static inline void 1383g_classifier_ini(void) 1384{ 1385 1386 g_register_classifier(&g_sched_classifier); 1387} 1388 1389static inline void 1390g_classifier_fini(void) 1391{ 1392 1393 g_unregister_classifier(&g_sched_classifier); 1394} 1395 1396static void 1397g_sched_init(struct g_class *mp) 1398{ 1399 1400 g_gsched_global_init(); 1401 1402 G_SCHED_DEBUG(0, "Loading: mp = %p, g_sched_class = %p.", 1403 mp, &g_sched_class); 1404 1405 /* Patch g_io_request to store classification info in the bio. */ 1406 g_classifier_ini(); 1407} 1408 1409static void 1410g_sched_fini(struct g_class *mp) 1411{ 1412 1413 g_classifier_fini(); 1414 1415 G_SCHED_DEBUG(0, "Unloading..."); 1416 1417 KASSERT(LIST_EMPTY(&me.gs_scheds), ("still registered schedulers")); 1418 mtx_destroy(&me.gs_mtx); 1419} 1420 1421static int 1422g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, 1423 struct thread *td) 1424{ 1425 struct g_consumer *cp; 1426 struct g_geom *gp; 1427 1428 cp = LIST_FIRST(&pp->geom->consumer); 1429 if (cp == NULL) 1430 return (ENOIOCTL); 1431 gp = cp->provider->geom; 1432 if (gp->ioctl == NULL) 1433 return (ENOIOCTL); 1434 return (gp->ioctl(cp->provider, cmd, data, fflag, td)); 1435} 1436 1437/* 1438 * Read the i-th argument for a request, skipping the /dev/ 1439 * prefix if present. 1440 */ 1441static const char * 1442g_sched_argi(struct gctl_req *req, int i) 1443{ 1444 static const char *dev_prefix = "/dev/"; 1445 const char *name; 1446 char param[16]; 1447 int l = strlen(dev_prefix); 1448 1449 snprintf(param, sizeof(param), "arg%d", i); 1450 name = gctl_get_asciiparam(req, param); 1451 if (name == NULL) 1452 gctl_error(req, "No 'arg%d' argument", i); 1453 else if (strncmp(name, dev_prefix, l) == 0) 1454 name += l; 1455 return (name); 1456} 1457 1458/* 1459 * Fetch nargs and do appropriate checks. 1460 */ 1461static int 1462g_sched_get_nargs(struct gctl_req *req) 1463{ 1464 int *nargs; 1465 1466 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1467 if (nargs == NULL) { 1468 gctl_error(req, "No 'nargs' argument"); 1469 return (0); 1470 } 1471 if (*nargs <= 0) 1472 gctl_error(req, "Missing device(s)."); 1473 return (*nargs); 1474} 1475 1476/* 1477 * Check whether we should add the class on certain volumes when 1478 * this geom is created. Right now this is under control of a kenv 1479 * variable containing the names of all devices that we care about. 1480 * Probably we should only support transparent insertion as the 1481 * preferred mode of operation. 1482 */ 1483static struct g_geom * 1484g_sched_taste(struct g_class *mp, struct g_provider *pp, 1485 int flags __unused) 1486{ 1487 struct g_gsched *gsp = NULL; /* the . algorithm we want */ 1488 const char *s; /* generic string pointer */ 1489 const char *taste_names; /* devices we like */ 1490 int l; 1491 1492 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, 1493 mp->name, pp->name); 1494 g_topology_assert(); 1495 1496 G_SCHED_DEBUG(2, "Tasting %s.", pp->name); 1497 1498 do { 1499 /* do not taste on ourselves */ 1500 if (pp->geom->class == mp) 1501 break; 1502 1503 taste_names = kern_getenv("geom.sched.taste"); 1504 if (taste_names == NULL) 1505 break; 1506 1507 l = strlen(pp->name); 1508 for (s = taste_names; *s && 1509 (s = strstr(s, pp->name)); s++) { 1510 /* further checks for an exact match */ 1511 if ( (s == taste_names || s[-1] == ' ') && 1512 (s[l] == '\0' || s[l] == ' ') ) 1513 break; 1514 } 1515 if (s == NULL) 1516 break; 1517 G_SCHED_DEBUG(0, "Attach device %s match [%s]\n", 1518 pp->name, s); 1519 1520 /* look up the provider name in the list */ 1521 s = kern_getenv("geom.sched.algo"); 1522 if (s == NULL) 1523 s = "rr"; 1524 1525 gsp = g_gsched_find(s); /* also get a reference */ 1526 if (gsp == NULL) { 1527 G_SCHED_DEBUG(0, "Bad '%s' algorithm.", s); 1528 break; 1529 } 1530 1531 /* XXX create with 1 as last argument ? */ 1532 g_sched_create(NULL, mp, pp, gsp, 0); 1533 g_gsched_unref(gsp); 1534 } while (0); 1535 return NULL; 1536} 1537 1538static void 1539g_sched_ctl_create(struct gctl_req *req, struct g_class *mp, int proxy) 1540{ 1541 struct g_provider *pp; 1542 struct g_gsched *gsp; 1543 const char *name; 1544 int i, nargs; 1545 1546 g_topology_assert(); 1547 1548 name = gctl_get_asciiparam(req, "algo"); 1549 if (name == NULL) { 1550 gctl_error(req, "No '%s' argument", "algo"); 1551 return; 1552 } 1553 1554 gsp = g_gsched_find(name); /* also get a reference */ 1555 if (gsp == NULL) { 1556 gctl_error(req, "Bad algorithm '%s'", name); 1557 return; 1558 } 1559 1560 nargs = g_sched_get_nargs(req); 1561 1562 /* 1563 * Run on the arguments, and break on any error. 1564 * We look for a device name, but skip the /dev/ prefix if any. 1565 */ 1566 for (i = 0; i < nargs; i++) { 1567 name = g_sched_argi(req, i); 1568 if (name == NULL) 1569 break; 1570 pp = g_provider_by_name(name); 1571 if (pp == NULL) { 1572 G_SCHED_DEBUG(1, "Provider %s is invalid.", name); 1573 gctl_error(req, "Provider %s is invalid.", name); 1574 break; 1575 } 1576 if (g_sched_create(req, mp, pp, gsp, proxy) != 0) 1577 break; 1578 } 1579 1580 g_gsched_unref(gsp); 1581} 1582 1583static void 1584g_sched_ctl_configure(struct gctl_req *req, struct g_class *mp) 1585{ 1586 struct g_provider *pp; 1587 struct g_gsched *gsp; 1588 const char *name; 1589 int i, nargs; 1590 1591 g_topology_assert(); 1592 1593 name = gctl_get_asciiparam(req, "algo"); 1594 if (name == NULL) { 1595 gctl_error(req, "No '%s' argument", "algo"); 1596 return; 1597 } 1598 1599 gsp = g_gsched_find(name); /* also get a reference */ 1600 if (gsp == NULL) { 1601 gctl_error(req, "Bad algorithm '%s'", name); 1602 return; 1603 } 1604 1605 nargs = g_sched_get_nargs(req); 1606 1607 /* 1608 * Run on the arguments, and break on any error. 1609 * We look for a device name, but skip the /dev/ prefix if any. 1610 */ 1611 for (i = 0; i < nargs; i++) { 1612 name = g_sched_argi(req, i); 1613 if (name == NULL) 1614 break; 1615 pp = g_provider_by_name(name); 1616 if (pp == NULL || pp->geom->class != mp) { 1617 G_SCHED_DEBUG(1, "Provider %s is invalid.", name); 1618 gctl_error(req, "Provider %s is invalid.", name); 1619 break; 1620 } 1621 if (g_sched_change_algo(req, mp, pp, gsp) != 0) 1622 break; 1623 } 1624 1625 g_gsched_unref(gsp); 1626} 1627 1628static struct g_geom * 1629g_sched_find_geom(struct g_class *mp, const char *name) 1630{ 1631 struct g_geom *gp; 1632 1633 LIST_FOREACH(gp, &mp->geom, geom) { 1634 if (strcmp(gp->name, name) == 0) 1635 return (gp); 1636 } 1637 return (NULL); 1638} 1639 1640static void 1641g_sched_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1642{ 1643 int nargs, *force, error, i; 1644 struct g_geom *gp; 1645 const char *name; 1646 1647 g_topology_assert(); 1648 1649 nargs = g_sched_get_nargs(req); 1650 1651 force = gctl_get_paraml(req, "force", sizeof(*force)); 1652 if (force == NULL) { 1653 gctl_error(req, "No 'force' argument"); 1654 return; 1655 } 1656 1657 for (i = 0; i < nargs; i++) { 1658 name = g_sched_argi(req, i); 1659 if (name == NULL) 1660 break; 1661 1662 gp = g_sched_find_geom(mp, name); 1663 if (gp == NULL) { 1664 G_SCHED_DEBUG(1, "Device %s is invalid.", name); 1665 gctl_error(req, "Device %s is invalid.", name); 1666 break; 1667 } 1668 1669 error = g_sched_destroy(gp, *force); 1670 if (error != 0) { 1671 gctl_error(req, "Cannot destroy device %s (error=%d).", 1672 gp->name, error); 1673 break; 1674 } 1675 } 1676} 1677 1678static void 1679g_sched_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1680{ 1681 uint32_t *version; 1682 1683 g_topology_assert(); 1684 1685 version = gctl_get_paraml(req, "version", sizeof(*version)); 1686 if (version == NULL) { 1687 gctl_error(req, "No '%s' argument.", "version"); 1688 return; 1689 } 1690 1691 if (*version != G_SCHED_VERSION) { 1692 gctl_error(req, "Userland and kernel parts are " 1693 "out of sync."); 1694 return; 1695 } 1696 1697 if (strcmp(verb, "create") == 0) { 1698 g_sched_ctl_create(req, mp, 0); 1699 return; 1700 } else if (strcmp(verb, "insert") == 0) { 1701 g_sched_ctl_create(req, mp, 1); 1702 return; 1703 } else if (strcmp(verb, "configure") == 0) { 1704 g_sched_ctl_configure(req, mp); 1705 return; 1706 } else if (strcmp(verb, "destroy") == 0) { 1707 g_sched_ctl_destroy(req, mp); 1708 return; 1709 } 1710 1711 gctl_error(req, "Unknown verb."); 1712} 1713 1714static void 1715g_sched_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1716 struct g_consumer *cp, struct g_provider *pp) 1717{ 1718 struct g_sched_softc *sc = gp->softc; 1719 struct g_gsched *gsp = sc->sc_gsched; 1720 if (indent == NULL) { /* plaintext */ 1721 sbuf_printf(sb, " algo %s", gsp ? gsp->gs_name : "--"); 1722 } 1723 if (gsp != NULL && gsp->gs_dumpconf) 1724 gsp->gs_dumpconf(sb, indent, gp, cp, pp); 1725} 1726 1727DECLARE_GEOM_CLASS(g_sched_class, g_sched); 1728MODULE_VERSION(geom_sched, 0); 1729