geom_vinum_init.c revision 135966
1/*- 2 * Copyright (c) 2004 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_init.c 135966 2004-09-30 12:57:35Z le $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/kernel.h> 33#include <sys/kthread.h> 34#include <sys/libkern.h> 35#include <sys/malloc.h> 36#include <sys/queue.h> 37 38#include <geom/geom.h> 39#include <geom/vinum/geom_vinum_var.h> 40#include <geom/vinum/geom_vinum.h> 41#include <geom/vinum/geom_vinum_share.h> 42 43int gv_init_plex(struct gv_plex *); 44int gv_init_sd(struct gv_sd *); 45void gv_init_td(void *); 46void gv_rebuild_plex(struct gv_plex *); 47void gv_rebuild_td(void *); 48void gv_start_plex(struct gv_plex *); 49void gv_start_vol(struct gv_volume *); 50void gv_sync(struct gv_volume *); 51void gv_sync_td(void *); 52 53struct gv_sync_args { 54 struct gv_volume *v; 55 struct gv_plex *from; 56 struct gv_plex *to; 57 off_t syncsize; 58}; 59 60void 61gv_start_obj(struct g_geom *gp, struct gctl_req *req) 62{ 63 struct gv_softc *sc; 64 struct gv_volume *v; 65 struct gv_plex *p; 66 int *argc, *initsize; 67 char *argv, buf[20]; 68 int i, type; 69 70 argc = gctl_get_paraml(req, "argc", sizeof(*argc)); 71 initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize)); 72 73 if (argc == NULL || *argc == 0) { 74 gctl_error(req, "no arguments given"); 75 return; 76 } 77 78 sc = gp->softc; 79 80 for (i = 0; i < *argc; i++) { 81 snprintf(buf, sizeof(buf), "argv%d", i); 82 argv = gctl_get_param(req, buf, NULL); 83 if (argv == NULL) 84 continue; 85 type = gv_object_type(sc, argv); 86 switch (type) { 87 case GV_TYPE_VOL: 88 v = gv_find_vol(sc, argv); 89 gv_start_vol(v); 90 break; 91 92 case GV_TYPE_PLEX: 93 p = gv_find_plex(sc, argv); 94 gv_start_plex(p); 95 break; 96 97 case GV_TYPE_SD: 98 case GV_TYPE_DRIVE: 99 /* XXX not yet */ 100 gctl_error(req, "cannot start '%s'", argv); 101 return; 102 default: 103 gctl_error(req, "unknown object '%s'", argv); 104 return; 105 } 106 } 107} 108 109void 110gv_start_plex(struct gv_plex *p) 111{ 112 struct gv_volume *v; 113 114 KASSERT(p != NULL, ("gv_start_plex: NULL p")); 115 116 if (p->state == GV_PLEX_UP) 117 return; 118 119 v = p->vol_sc; 120 if ((v != NULL) && (v->plexcount > 1)) 121 gv_sync(v); 122 else if (p->org == GV_PLEX_RAID5) { 123 if (p->state == GV_PLEX_DEGRADED) 124 gv_rebuild_plex(p); 125 else 126 gv_init_plex(p); 127 } 128 129 return; 130} 131 132void 133gv_start_vol(struct gv_volume *v) 134{ 135 struct gv_plex *p; 136 struct gv_sd *s; 137 138 KASSERT(v != NULL, ("gv_start_vol: NULL v")); 139 140 if (v->plexcount == 0) 141 return; 142 143 else if (v->plexcount == 1) { 144 p = LIST_FIRST(&v->plexes); 145 KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name)); 146 if (p->org == GV_PLEX_RAID5) { 147 switch (p->state) { 148 case GV_PLEX_DOWN: 149 gv_init_plex(p); 150 break; 151 case GV_PLEX_DEGRADED: 152 gv_rebuild_plex(p); 153 break; 154 default: 155 return; 156 } 157 } else { 158 LIST_FOREACH(s, &p->subdisks, in_plex) { 159 gv_set_sd_state(s, GV_SD_UP, 160 GV_SETSTATE_CONFIG); 161 } 162 } 163 } else 164 gv_sync(v); 165} 166 167void 168gv_sync(struct gv_volume *v) 169{ 170 struct gv_softc *sc; 171 struct gv_plex *p, *up; 172 struct gv_sync_args *sync; 173 174 KASSERT(v != NULL, ("gv_sync: NULL v")); 175 sc = v->vinumconf; 176 KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name)); 177 178 /* Find the plex that's up. */ 179 up = NULL; 180 LIST_FOREACH(up, &v->plexes, in_volume) { 181 if (up->state == GV_PLEX_UP) 182 break; 183 } 184 185 /* Didn't find a good plex. */ 186 if (up == NULL) 187 return; 188 189 LIST_FOREACH(p, &v->plexes, in_volume) { 190 if ((p == up) || (p->state == GV_PLEX_UP)) 191 continue; 192 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); 193 sync->v = v; 194 sync->from = up; 195 sync->to = p; 196 sync->syncsize = GV_DFLT_SYNCSIZE; 197 kthread_create(gv_sync_td, sync, NULL, 0, 0, "sync_p '%s'", 198 p->name); 199 } 200} 201 202void 203gv_rebuild_plex(struct gv_plex *p) 204{ 205 struct gv_sync_args *sync; 206 207 if ((p->flags & GV_PLEX_SYNCING) || gv_is_open(p->geom)) 208 return; 209 210 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); 211 sync->to = p; 212 sync->syncsize = GV_DFLT_SYNCSIZE; 213 214 kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s", 215 p->name); 216} 217 218int 219gv_init_plex(struct gv_plex *p) 220{ 221 struct gv_sd *s; 222 int err; 223 224 KASSERT(p != NULL, ("gv_init_plex: NULL p")); 225 226 LIST_FOREACH(s, &p->subdisks, in_plex) { 227 err = gv_init_sd(s); 228 if (err) 229 return (err); 230 } 231 232 return (0); 233} 234 235int 236gv_init_sd(struct gv_sd *s) 237{ 238 KASSERT(s != NULL, ("gv_init_sd: NULL s")); 239 240 if (gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE)) 241 return (-1); 242 243 s->init_size = GV_DFLT_SYNCSIZE; 244 s->flags &= ~GV_SD_INITCANCEL; 245 246 /* Spawn the thread that does the work for us. */ 247 kthread_create(gv_init_td, s, NULL, 0, 0, "init_sd %s", s->name); 248 249 return (0); 250} 251 252/* This thread is responsible for rebuilding a degraded RAID5 plex. */ 253void 254gv_rebuild_td(void *arg) 255{ 256 struct bio *bp; 257 struct gv_plex *p; 258 struct g_consumer *cp; 259 struct gv_sync_args *sync; 260 u_char *buf; 261 off_t i; 262 int error; 263 264 buf = NULL; 265 bp = NULL; 266 267 sync = arg; 268 p = sync->to; 269 p->synced = 0; 270 p->flags |= GV_PLEX_SYNCING; 271 cp = p->consumer; 272 273 g_topology_lock(); 274 error = g_access(cp, 1, 1, 0); 275 if (error) { 276 g_topology_unlock(); 277 printf("GEOM_VINUM: rebuild of %s failed to access consumer: " 278 "%d\n", p->name, error); 279 kthread_exit(error); 280 } 281 g_topology_unlock(); 282 283 buf = g_malloc(sync->syncsize, M_WAITOK); 284 285 printf("GEOM_VINUM: rebuild of %s started\n", p->name); 286 i = 0; 287 for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) { 288/* 289 if (i + sync->syncsize > p->size) 290 sync->syncsize = p->size - i; 291*/ 292 bp = g_new_bio(); 293 if (bp == NULL) { 294 printf("GEOM_VINUM: rebuild of %s failed creating bio: " 295 "out of memory\n", p->name); 296 break; 297 } 298 bp->bio_cmd = BIO_WRITE; 299 bp->bio_done = NULL; 300 bp->bio_data = buf; 301 bp->bio_cflags |= GV_BIO_REBUILD; 302 bp->bio_offset = i; 303 bp->bio_length = p->stripesize; 304 305 /* Schedule it down ... */ 306 g_io_request(bp, cp); 307 308 /* ... and wait for the result. */ 309 error = biowait(bp, "gwrite"); 310 if (error) { 311 printf("GEOM_VINUM: rebuild of %s failed at offset %jd " 312 "errno: %d\n", p->name, i, error); 313 break; 314 } 315 g_destroy_bio(bp); 316 bp = NULL; 317 } 318 319 if (bp != NULL) 320 g_destroy_bio(bp); 321 if (buf != NULL) 322 g_free(buf); 323 324 g_topology_lock(); 325 g_access(cp, -1, -1, 0); 326 gv_save_config_all(p->vinumconf); 327 g_topology_unlock(); 328 329 p->flags &= ~GV_PLEX_SYNCING; 330 p->synced = 0; 331 332 /* Successful initialization. */ 333 if (!error) 334 printf("GEOM_VINUM: rebuild of %s finished\n", p->name); 335 336 g_free(sync); 337 kthread_exit(error); 338} 339 340void 341gv_sync_td(void *arg) 342{ 343 struct bio *bp; 344 struct gv_plex *p; 345 struct g_consumer *from, *to; 346 struct gv_sync_args *sync; 347 u_char *buf; 348 off_t i; 349 int error; 350 351 sync = arg; 352 353 from = sync->from->consumer; 354 to = sync->to->consumer; 355 356 p = sync->to; 357 p->synced = 0; 358 p->flags |= GV_PLEX_SYNCING; 359 360 error = 0; 361 362 g_topology_lock(); 363 error = g_access(from, 1, 0, 0); 364 if (error) { 365 g_topology_unlock(); 366 printf("gvinum: sync from '%s' failed to access consumer: %d\n", 367 sync->from->name, error); 368 kthread_exit(error); 369 } 370 error = g_access(to, 0, 1, 0); 371 if (error) { 372 g_access(from, -1, 0, 0); 373 g_topology_unlock(); 374 printf("gvinum: sync to '%s' failed to access consumer: %d\n", 375 p->name, error); 376 kthread_exit(error); 377 } 378 g_topology_unlock(); 379 380 printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name, 381 sync->to->name); 382 for (i = 0; i < p->size; i+= sync->syncsize) { 383 /* Read some bits from the good plex. */ 384 buf = g_read_data(from, i, sync->syncsize, &error); 385 if (buf == NULL) { 386 printf("gvinum: sync read from '%s' failed at offset " 387 "%jd, errno: %d\n", sync->from->name, i, error); 388 break; 389 } 390 391 /* 392 * Create a bio and schedule it down on the 'bad' plex. We 393 * cannot simply use g_write_data() because we have to let the 394 * lower parts know that we are an initialization process and 395 * not a 'normal' request. 396 */ 397 bp = g_new_bio(); 398 if (bp == NULL) { 399 printf("gvinum: sync write to '%s' failed at offset " 400 "%jd, out of memory\n", p->name, i); 401 g_free(buf); 402 break; 403 } 404 bp->bio_cmd = BIO_WRITE; 405 bp->bio_offset = i; 406 bp->bio_length = sync->syncsize; 407 bp->bio_data = buf; 408 bp->bio_done = NULL; 409 410 /* 411 * This hack declare this bio as part of an initialization 412 * process, so that the lower levels allow it to get through. 413 */ 414 bp->bio_cflags |= GV_BIO_SYNCREQ; 415 416 /* Schedule it down ... */ 417 g_io_request(bp, to); 418 419 /* ... and wait for the result. */ 420 error = biowait(bp, "gwrite"); 421 g_destroy_bio(bp); 422 g_free(buf); 423 if (error) { 424 printf("gvinum: sync write to '%s' failed at offset " 425 "%jd, errno: %d\n", p->name, i, error); 426 break; 427 } 428 429 /* Note that we have synced a little bit more. */ 430 p->synced += sync->syncsize; 431 } 432 433 g_topology_lock(); 434 g_access(from, -1, 0, 0); 435 g_access(to, 0, -1, 0); 436 gv_save_config_all(p->vinumconf); 437 g_topology_unlock(); 438 439 /* Successful initialization. */ 440 if (!error) { 441 p->flags &= ~GV_PLEX_SYNCING; 442 printf("GEOM_VINUM: plex sync %s -> %s finished\n", 443 sync->from->name, sync->to->name); 444 } 445 446 g_free(sync); 447 kthread_exit(error); 448} 449 450void 451gv_init_td(void *arg) 452{ 453 struct gv_sd *s; 454 struct gv_drive *d; 455 struct g_geom *gp; 456 struct g_consumer *cp; 457 int error; 458 off_t i, init_size, start, offset, length; 459 u_char *buf; 460 461 s = arg; 462 KASSERT(s != NULL, ("gv_init_td: NULL s")); 463 d = s->drive_sc; 464 KASSERT(d != NULL, ("gv_init_td: NULL d")); 465 gp = d->geom; 466 KASSERT(gp != NULL, ("gv_init_td: NULL gp")); 467 468 cp = LIST_FIRST(&gp->consumer); 469 KASSERT(cp != NULL, ("gv_init_td: NULL cp")); 470 471 s->init_error = 0; 472 init_size = s->init_size; 473 start = s->drive_offset + s->initialized; 474 offset = s->drive_offset; 475 length = s->size; 476 477 buf = g_malloc(s->init_size, M_WAITOK | M_ZERO); 478 479 g_topology_lock(); 480 error = g_access(cp, 0, 1, 0); 481 if (error) { 482 s->init_error = error; 483 g_topology_unlock(); 484 printf("geom_vinum: init '%s' failed to access consumer: %d\n", 485 s->name, error); 486 kthread_exit(error); 487 } 488 g_topology_unlock(); 489 490 for (i = start; i < offset + length; i += init_size) { 491 if (s->flags & GV_SD_INITCANCEL) { 492 printf("geom_vinum: subdisk '%s' init: cancelled at" 493 " offset %jd (drive offset %jd)\n", s->name, 494 (intmax_t)s->initialized, (intmax_t)i); 495 error = EAGAIN; 496 break; 497 } 498 error = g_write_data(cp, i, buf, init_size); 499 if (error) { 500 printf("geom_vinum: subdisk '%s' init: write failed" 501 " at offset %jd (drive offset %jd)\n", s->name, 502 (intmax_t)s->initialized, (intmax_t)i); 503 break; 504 } 505 s->initialized += init_size; 506 } 507 508 g_free(buf); 509 510 g_topology_lock(); 511 g_access(cp, 0, -1, 0); 512 g_topology_unlock(); 513 if (error) { 514 s->init_error = error; 515 g_topology_lock(); 516 gv_set_sd_state(s, GV_SD_STALE, 517 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 518 g_topology_unlock(); 519 } else { 520 g_topology_lock(); 521 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 522 g_topology_unlock(); 523 s->initialized = 0; 524 printf("geom_vinum: init '%s' finished\n", s->name); 525 } 526 kthread_exit(error); 527} 528