1/* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stackglue.c 5 * 6 * Code which implements an OCFS2 specific interface to underlying 7 * cluster stacks. 8 * 9 * Copyright (C) 2007, 2009 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation, version 2. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 */ 20 21#include <linux/list.h> 22#include <linux/spinlock.h> 23#include <linux/module.h> 24#include <linux/slab.h> 25#include <linux/kmod.h> 26#include <linux/fs.h> 27#include <linux/kobject.h> 28#include <linux/sysfs.h> 29#include <linux/sysctl.h> 30 31#include "ocfs2_fs.h" 32 33#include "stackglue.h" 34 35#define OCFS2_STACK_PLUGIN_O2CB "o2cb" 36#define OCFS2_STACK_PLUGIN_USER "user" 37#define OCFS2_MAX_HB_CTL_PATH 256 38 39static struct ocfs2_protocol_version locking_max_version; 40static DEFINE_SPINLOCK(ocfs2_stack_lock); 41static LIST_HEAD(ocfs2_stack_list); 42static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; 43static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; 44 45/* 46 * The stack currently in use. If not null, active_stack->sp_count > 0, 47 * the module is pinned, and the locking protocol cannot be changed. 48 */ 49static struct ocfs2_stack_plugin *active_stack; 50 51static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) 52{ 53 struct ocfs2_stack_plugin *p; 54 55 assert_spin_locked(&ocfs2_stack_lock); 56 57 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 58 if (!strcmp(p->sp_name, name)) 59 return p; 60 } 61 62 return NULL; 63} 64 65static int ocfs2_stack_driver_request(const char *stack_name, 66 const char *plugin_name) 67{ 68 int rc; 69 struct ocfs2_stack_plugin *p; 70 71 spin_lock(&ocfs2_stack_lock); 72 73 /* 74 * If the stack passed by the filesystem isn't the selected one, 75 * we can't continue. 76 */ 77 if (strcmp(stack_name, cluster_stack_name)) { 78 rc = -EBUSY; 79 goto out; 80 } 81 82 if (active_stack) { 83 /* 84 * If the active stack isn't the one we want, it cannot 85 * be selected right now. 86 */ 87 if (!strcmp(active_stack->sp_name, plugin_name)) 88 rc = 0; 89 else 90 rc = -EBUSY; 91 goto out; 92 } 93 94 p = ocfs2_stack_lookup(plugin_name); 95 if (!p || !try_module_get(p->sp_owner)) { 96 rc = -ENOENT; 97 goto out; 98 } 99 100 active_stack = p; 101 rc = 0; 102 103out: 104 /* If we found it, pin it */ 105 if (!rc) 106 active_stack->sp_count++; 107 108 spin_unlock(&ocfs2_stack_lock); 109 return rc; 110} 111 112/* 113 * This function looks up the appropriate stack and makes it active. If 114 * there is no stack, it tries to load it. It will fail if the stack still 115 * cannot be found. It will also fail if a different stack is in use. 116 */ 117static int ocfs2_stack_driver_get(const char *stack_name) 118{ 119 int rc; 120 char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; 121 122 /* 123 * Classic stack does not pass in a stack name. This is 124 * compatible with older tools as well. 125 */ 126 if (!stack_name || !*stack_name) 127 stack_name = OCFS2_STACK_PLUGIN_O2CB; 128 129 if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { 130 printk(KERN_ERR 131 "ocfs2 passed an invalid cluster stack label: \"%s\"\n", 132 stack_name); 133 return -EINVAL; 134 } 135 136 /* Anything that isn't the classic stack is a user stack */ 137 if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) 138 plugin_name = OCFS2_STACK_PLUGIN_USER; 139 140 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 141 if (rc == -ENOENT) { 142 request_module("ocfs2_stack_%s", plugin_name); 143 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 144 } 145 146 if (rc == -ENOENT) { 147 printk(KERN_ERR 148 "ocfs2: Cluster stack driver \"%s\" cannot be found\n", 149 plugin_name); 150 } else if (rc == -EBUSY) { 151 printk(KERN_ERR 152 "ocfs2: A different cluster stack is in use\n"); 153 } 154 155 return rc; 156} 157 158static void ocfs2_stack_driver_put(void) 159{ 160 spin_lock(&ocfs2_stack_lock); 161 BUG_ON(active_stack == NULL); 162 BUG_ON(active_stack->sp_count == 0); 163 164 active_stack->sp_count--; 165 if (!active_stack->sp_count) { 166 module_put(active_stack->sp_owner); 167 active_stack = NULL; 168 } 169 spin_unlock(&ocfs2_stack_lock); 170} 171 172int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) 173{ 174 int rc; 175 176 spin_lock(&ocfs2_stack_lock); 177 if (!ocfs2_stack_lookup(plugin->sp_name)) { 178 plugin->sp_count = 0; 179 plugin->sp_max_proto = locking_max_version; 180 list_add(&plugin->sp_list, &ocfs2_stack_list); 181 printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", 182 plugin->sp_name); 183 rc = 0; 184 } else { 185 printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", 186 plugin->sp_name); 187 rc = -EEXIST; 188 } 189 spin_unlock(&ocfs2_stack_lock); 190 191 return rc; 192} 193EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); 194 195void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) 196{ 197 struct ocfs2_stack_plugin *p; 198 199 spin_lock(&ocfs2_stack_lock); 200 p = ocfs2_stack_lookup(plugin->sp_name); 201 if (p) { 202 BUG_ON(p != plugin); 203 BUG_ON(plugin == active_stack); 204 BUG_ON(plugin->sp_count != 0); 205 list_del_init(&plugin->sp_list); 206 printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", 207 plugin->sp_name); 208 } else { 209 printk(KERN_ERR "Stack \"%s\" is not registered\n", 210 plugin->sp_name); 211 } 212 spin_unlock(&ocfs2_stack_lock); 213} 214EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); 215 216void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_proto) 217{ 218 struct ocfs2_stack_plugin *p; 219 220 spin_lock(&ocfs2_stack_lock); 221 if (memcmp(max_proto, &locking_max_version, 222 sizeof(struct ocfs2_protocol_version))) { 223 BUG_ON(locking_max_version.pv_major != 0); 224 225 locking_max_version = *max_proto; 226 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 227 p->sp_max_proto = locking_max_version; 228 } 229 } 230 spin_unlock(&ocfs2_stack_lock); 231} 232EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_max_proto_version); 233 234 235/* 236 * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take no argument 237 * for the ast and bast functions. They will pass the lksb to the ast 238 * and bast. The caller can wrap the lksb with their own structure to 239 * get more information. 240 */ 241int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, 242 int mode, 243 struct ocfs2_dlm_lksb *lksb, 244 u32 flags, 245 void *name, 246 unsigned int namelen) 247{ 248 if (!lksb->lksb_conn) 249 lksb->lksb_conn = conn; 250 else 251 BUG_ON(lksb->lksb_conn != conn); 252 return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, 253 name, namelen); 254} 255EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); 256 257int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, 258 struct ocfs2_dlm_lksb *lksb, 259 u32 flags) 260{ 261 BUG_ON(lksb->lksb_conn == NULL); 262 263 return active_stack->sp_ops->dlm_unlock(conn, lksb, flags); 264} 265EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); 266 267int ocfs2_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) 268{ 269 return active_stack->sp_ops->lock_status(lksb); 270} 271EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 272 273int ocfs2_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) 274{ 275 return active_stack->sp_ops->lvb_valid(lksb); 276} 277EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); 278 279void *ocfs2_dlm_lvb(struct ocfs2_dlm_lksb *lksb) 280{ 281 return active_stack->sp_ops->lock_lvb(lksb); 282} 283EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); 284 285void ocfs2_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) 286{ 287 active_stack->sp_ops->dump_lksb(lksb); 288} 289EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); 290 291int ocfs2_stack_supports_plocks(void) 292{ 293 return active_stack && active_stack->sp_ops->plock; 294} 295EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks); 296 297/* 298 * ocfs2_plock() can only be safely called if 299 * ocfs2_stack_supports_plocks() returned true 300 */ 301int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, 302 struct file *file, int cmd, struct file_lock *fl) 303{ 304 WARN_ON_ONCE(active_stack->sp_ops->plock == NULL); 305 if (active_stack->sp_ops->plock) 306 return active_stack->sp_ops->plock(conn, ino, file, cmd, fl); 307 return -EOPNOTSUPP; 308} 309EXPORT_SYMBOL_GPL(ocfs2_plock); 310 311int ocfs2_cluster_connect(const char *stack_name, 312 const char *group, 313 int grouplen, 314 struct ocfs2_locking_protocol *lproto, 315 void (*recovery_handler)(int node_num, 316 void *recovery_data), 317 void *recovery_data, 318 struct ocfs2_cluster_connection **conn) 319{ 320 int rc = 0; 321 struct ocfs2_cluster_connection *new_conn; 322 323 BUG_ON(group == NULL); 324 BUG_ON(conn == NULL); 325 BUG_ON(recovery_handler == NULL); 326 327 if (grouplen > GROUP_NAME_MAX) { 328 rc = -EINVAL; 329 goto out; 330 } 331 332 if (memcmp(&lproto->lp_max_version, &locking_max_version, 333 sizeof(struct ocfs2_protocol_version))) { 334 rc = -EINVAL; 335 goto out; 336 } 337 338 new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), 339 GFP_KERNEL); 340 if (!new_conn) { 341 rc = -ENOMEM; 342 goto out; 343 } 344 345 memcpy(new_conn->cc_name, group, grouplen); 346 new_conn->cc_namelen = grouplen; 347 new_conn->cc_recovery_handler = recovery_handler; 348 new_conn->cc_recovery_data = recovery_data; 349 350 new_conn->cc_proto = lproto; 351 /* Start the new connection at our maximum compatibility level */ 352 new_conn->cc_version = lproto->lp_max_version; 353 354 /* This will pin the stack driver if successful */ 355 rc = ocfs2_stack_driver_get(stack_name); 356 if (rc) 357 goto out_free; 358 359 rc = active_stack->sp_ops->connect(new_conn); 360 if (rc) { 361 ocfs2_stack_driver_put(); 362 goto out_free; 363 } 364 365 *conn = new_conn; 366 367out_free: 368 if (rc) 369 kfree(new_conn); 370 371out: 372 return rc; 373} 374EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); 375 376/* The caller will ensure all nodes have the same cluster stack */ 377int ocfs2_cluster_connect_agnostic(const char *group, 378 int grouplen, 379 struct ocfs2_locking_protocol *lproto, 380 void (*recovery_handler)(int node_num, 381 void *recovery_data), 382 void *recovery_data, 383 struct ocfs2_cluster_connection **conn) 384{ 385 char *stack_name = NULL; 386 387 if (cluster_stack_name[0]) 388 stack_name = cluster_stack_name; 389 return ocfs2_cluster_connect(stack_name, group, grouplen, lproto, 390 recovery_handler, recovery_data, conn); 391} 392EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic); 393 394/* If hangup_pending is 0, the stack driver will be dropped */ 395int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, 396 int hangup_pending) 397{ 398 int ret; 399 400 BUG_ON(conn == NULL); 401 402 ret = active_stack->sp_ops->disconnect(conn); 403 404 if (!ret) { 405 kfree(conn); 406 if (!hangup_pending) 407 ocfs2_stack_driver_put(); 408 } 409 410 return ret; 411} 412EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); 413 414/* 415 * Leave the group for this filesystem. This is executed by a userspace 416 * program (stored in ocfs2_hb_ctl_path). 417 */ 418static void ocfs2_leave_group(const char *group) 419{ 420 int ret; 421 char *argv[5], *envp[3]; 422 423 argv[0] = ocfs2_hb_ctl_path; 424 argv[1] = "-K"; 425 argv[2] = "-u"; 426 argv[3] = (char *)group; 427 argv[4] = NULL; 428 429 /* minimal command environment taken from cpu_run_sbin_hotplug */ 430 envp[0] = "HOME=/"; 431 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 432 envp[2] = NULL; 433 434 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 435 if (ret < 0) { 436 printk(KERN_ERR 437 "ocfs2: Error %d running user helper " 438 "\"%s %s %s %s\"\n", 439 ret, argv[0], argv[1], argv[2], argv[3]); 440 } 441} 442 443/* 444 * Hangup is a required post-umount. ocfs2-tools software expects the 445 * filesystem to call "ocfs2_hb_ctl" during unmount. This happens 446 * regardless of whether the DLM got started, so we can't do it 447 * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does 448 * the actual work. 449 */ 450void ocfs2_cluster_hangup(const char *group, int grouplen) 451{ 452 BUG_ON(group == NULL); 453 BUG_ON(group[grouplen] != '\0'); 454 455 ocfs2_leave_group(group); 456 457 /* cluster_disconnect() was called with hangup_pending==1 */ 458 ocfs2_stack_driver_put(); 459} 460EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); 461 462int ocfs2_cluster_this_node(unsigned int *node) 463{ 464 return active_stack->sp_ops->this_node(node); 465} 466EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); 467 468 469/* 470 * Sysfs bits 471 */ 472 473static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, 474 struct kobj_attribute *attr, 475 char *buf) 476{ 477 ssize_t ret = 0; 478 479 spin_lock(&ocfs2_stack_lock); 480 if (locking_max_version.pv_major) 481 ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", 482 locking_max_version.pv_major, 483 locking_max_version.pv_minor); 484 spin_unlock(&ocfs2_stack_lock); 485 486 return ret; 487} 488 489static struct kobj_attribute ocfs2_attr_max_locking_protocol = 490 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, 491 ocfs2_max_locking_protocol_show, NULL); 492 493static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, 494 struct kobj_attribute *attr, 495 char *buf) 496{ 497 ssize_t ret = 0, total = 0, remain = PAGE_SIZE; 498 struct ocfs2_stack_plugin *p; 499 500 spin_lock(&ocfs2_stack_lock); 501 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 502 ret = snprintf(buf, remain, "%s\n", 503 p->sp_name); 504 if (ret < 0) { 505 total = ret; 506 break; 507 } 508 if (ret == remain) { 509 /* snprintf() didn't fit */ 510 total = -E2BIG; 511 break; 512 } 513 total += ret; 514 remain -= ret; 515 } 516 spin_unlock(&ocfs2_stack_lock); 517 518 return total; 519} 520 521static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = 522 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, 523 ocfs2_loaded_cluster_plugins_show, NULL); 524 525static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, 526 struct kobj_attribute *attr, 527 char *buf) 528{ 529 ssize_t ret = 0; 530 531 spin_lock(&ocfs2_stack_lock); 532 if (active_stack) { 533 ret = snprintf(buf, PAGE_SIZE, "%s\n", 534 active_stack->sp_name); 535 if (ret == PAGE_SIZE) 536 ret = -E2BIG; 537 } 538 spin_unlock(&ocfs2_stack_lock); 539 540 return ret; 541} 542 543static struct kobj_attribute ocfs2_attr_active_cluster_plugin = 544 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, 545 ocfs2_active_cluster_plugin_show, NULL); 546 547static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, 548 struct kobj_attribute *attr, 549 char *buf) 550{ 551 ssize_t ret; 552 spin_lock(&ocfs2_stack_lock); 553 ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); 554 spin_unlock(&ocfs2_stack_lock); 555 556 return ret; 557} 558 559static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, 560 struct kobj_attribute *attr, 561 const char *buf, size_t count) 562{ 563 size_t len = count; 564 ssize_t ret; 565 566 if (len == 0) 567 return len; 568 569 if (buf[len - 1] == '\n') 570 len--; 571 572 if ((len != OCFS2_STACK_LABEL_LEN) || 573 (strnlen(buf, len) != len)) 574 return -EINVAL; 575 576 spin_lock(&ocfs2_stack_lock); 577 if (active_stack) { 578 if (!strncmp(buf, cluster_stack_name, len)) 579 ret = count; 580 else 581 ret = -EBUSY; 582 } else { 583 memcpy(cluster_stack_name, buf, len); 584 ret = count; 585 } 586 spin_unlock(&ocfs2_stack_lock); 587 588 return ret; 589} 590 591 592static struct kobj_attribute ocfs2_attr_cluster_stack = 593 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, 594 ocfs2_cluster_stack_show, 595 ocfs2_cluster_stack_store); 596 597static struct attribute *ocfs2_attrs[] = { 598 &ocfs2_attr_max_locking_protocol.attr, 599 &ocfs2_attr_loaded_cluster_plugins.attr, 600 &ocfs2_attr_active_cluster_plugin.attr, 601 &ocfs2_attr_cluster_stack.attr, 602 NULL, 603}; 604 605static struct attribute_group ocfs2_attr_group = { 606 .attrs = ocfs2_attrs, 607}; 608 609static struct kset *ocfs2_kset; 610 611static void ocfs2_sysfs_exit(void) 612{ 613 kset_unregister(ocfs2_kset); 614} 615 616static int ocfs2_sysfs_init(void) 617{ 618 int ret; 619 620 ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); 621 if (!ocfs2_kset) 622 return -ENOMEM; 623 624 ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); 625 if (ret) 626 goto error; 627 628 return 0; 629 630error: 631 kset_unregister(ocfs2_kset); 632 return ret; 633} 634 635/* 636 * Sysctl bits 637 * 638 * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't 639 * make as much sense in a multiple cluster stack world, but it's safer 640 * and easier to preserve the name. 641 */ 642 643#define FS_OCFS2_NM 1 644 645static ctl_table ocfs2_nm_table[] = { 646 { 647 .procname = "hb_ctl_path", 648 .data = ocfs2_hb_ctl_path, 649 .maxlen = OCFS2_MAX_HB_CTL_PATH, 650 .mode = 0644, 651 .proc_handler = proc_dostring, 652 }, 653 { } 654}; 655 656static ctl_table ocfs2_mod_table[] = { 657 { 658 .procname = "nm", 659 .data = NULL, 660 .maxlen = 0, 661 .mode = 0555, 662 .child = ocfs2_nm_table 663 }, 664 { } 665}; 666 667static ctl_table ocfs2_kern_table[] = { 668 { 669 .procname = "ocfs2", 670 .data = NULL, 671 .maxlen = 0, 672 .mode = 0555, 673 .child = ocfs2_mod_table 674 }, 675 { } 676}; 677 678static ctl_table ocfs2_root_table[] = { 679 { 680 .procname = "fs", 681 .data = NULL, 682 .maxlen = 0, 683 .mode = 0555, 684 .child = ocfs2_kern_table 685 }, 686 { } 687}; 688 689static struct ctl_table_header *ocfs2_table_header = NULL; 690 691 692/* 693 * Initialization 694 */ 695 696static int __init ocfs2_stack_glue_init(void) 697{ 698 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); 699 700 ocfs2_table_header = register_sysctl_table(ocfs2_root_table); 701 if (!ocfs2_table_header) { 702 printk(KERN_ERR 703 "ocfs2 stack glue: unable to register sysctl\n"); 704 return -ENOMEM; /* or something. */ 705 } 706 707 return ocfs2_sysfs_init(); 708} 709 710static void __exit ocfs2_stack_glue_exit(void) 711{ 712 memset(&locking_max_version, 0, 713 sizeof(struct ocfs2_protocol_version)); 714 locking_max_version.pv_major = 0; 715 locking_max_version.pv_minor = 0; 716 ocfs2_sysfs_exit(); 717 if (ocfs2_table_header) 718 unregister_sysctl_table(ocfs2_table_header); 719} 720 721MODULE_AUTHOR("Oracle"); 722MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); 723MODULE_LICENSE("GPL"); 724module_init(ocfs2_stack_glue_init); 725module_exit(ocfs2_stack_glue_exit); 726