1/* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stack_user.c 5 * 6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack. 7 * 8 * Copyright (C) 2007 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation, version 2. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 */ 19 20#include <linux/module.h> 21#include <linux/fs.h> 22#include <linux/miscdevice.h> 23#include <linux/mutex.h> 24#include <linux/slab.h> 25#include <linux/smp_lock.h> 26#include <linux/reboot.h> 27#include <asm/uaccess.h> 28 29#include "stackglue.h" 30 31#include <linux/dlm_plock.h> 32 33/* 34 * The control protocol starts with a handshake. Until the handshake 35 * is complete, the control device will fail all write(2)s. 36 * 37 * The handshake is simple. First, the client reads until EOF. Each line 38 * of output is a supported protocol tag. All protocol tags are a single 39 * character followed by a two hex digit version number. Currently the 40 * only things supported is T01, for "Text-base version 0x01". Next, the 41 * client writes the version they would like to use, including the newline. 42 * Thus, the protocol tag is 'T01\n'. If the version tag written is 43 * unknown, -EINVAL is returned. Once the negotiation is complete, the 44 * client can start sending messages. 45 * 46 * The T01 protocol has three messages. First is the "SETN" message. 47 * It has the following syntax: 48 * 49 * SETN<space><8-char-hex-nodenum><newline> 50 * 51 * This is 14 characters. 52 * 53 * The "SETN" message must be the first message following the protocol. 54 * It tells ocfs2_control the local node number. 55 * 56 * Next comes the "SETV" message. It has the following syntax: 57 * 58 * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> 59 * 60 * This is 11 characters. 61 * 62 * The "SETV" message sets the filesystem locking protocol version as 63 * negotiated by the client. The client negotiates based on the maximum 64 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major 65 * number from the "SETV" message must match 66 * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number 67 * must be less than or equal to ...sp_max_version.pv_minor. 68 * 69 * Once this information has been set, mounts will be allowed. From this 70 * point on, the "DOWN" message can be sent for node down notification. 71 * It has the following syntax: 72 * 73 * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> 74 * 75 * eg: 76 * 77 * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n 78 * 79 * This is 47 characters. 80 */ 81 82/* 83 * Whether or not the client has done the handshake. 84 * For now, we have just one protocol version. 85 */ 86#define OCFS2_CONTROL_PROTO "T01\n" 87#define OCFS2_CONTROL_PROTO_LEN 4 88 89/* Handshake states */ 90#define OCFS2_CONTROL_HANDSHAKE_INVALID (0) 91#define OCFS2_CONTROL_HANDSHAKE_READ (1) 92#define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) 93#define OCFS2_CONTROL_HANDSHAKE_VALID (3) 94 95/* Messages */ 96#define OCFS2_CONTROL_MESSAGE_OP_LEN 4 97#define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" 98#define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 99#define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" 100#define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 101#define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" 102#define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 103#define OCFS2_TEXT_UUID_LEN 32 104#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 105#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 106 107/* 108 * ocfs2_live_connection is refcounted because the filesystem and 109 * miscdevice sides can detach in different order. Let's just be safe. 110 */ 111struct ocfs2_live_connection { 112 struct list_head oc_list; 113 struct ocfs2_cluster_connection *oc_conn; 114}; 115 116struct ocfs2_control_private { 117 struct list_head op_list; 118 int op_state; 119 int op_this_node; 120 struct ocfs2_protocol_version op_proto; 121}; 122 123/* SETN<space><8-char-hex-nodenum><newline> */ 124struct ocfs2_control_message_setn { 125 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 126 char space; 127 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 128 char newline; 129}; 130 131/* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ 132struct ocfs2_control_message_setv { 133 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 134 char space1; 135 char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 136 char space2; 137 char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 138 char newline; 139}; 140 141/* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ 142struct ocfs2_control_message_down { 143 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 144 char space1; 145 char uuid[OCFS2_TEXT_UUID_LEN]; 146 char space2; 147 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 148 char newline; 149}; 150 151union ocfs2_control_message { 152 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 153 struct ocfs2_control_message_setn u_setn; 154 struct ocfs2_control_message_setv u_setv; 155 struct ocfs2_control_message_down u_down; 156}; 157 158static struct ocfs2_stack_plugin ocfs2_user_plugin; 159 160static atomic_t ocfs2_control_opened; 161static int ocfs2_control_this_node = -1; 162static struct ocfs2_protocol_version running_proto; 163 164static LIST_HEAD(ocfs2_live_connection_list); 165static LIST_HEAD(ocfs2_control_private_list); 166static DEFINE_MUTEX(ocfs2_control_lock); 167 168static inline void ocfs2_control_set_handshake_state(struct file *file, 169 int state) 170{ 171 struct ocfs2_control_private *p = file->private_data; 172 p->op_state = state; 173} 174 175static inline int ocfs2_control_get_handshake_state(struct file *file) 176{ 177 struct ocfs2_control_private *p = file->private_data; 178 return p->op_state; 179} 180 181static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) 182{ 183 size_t len = strlen(name); 184 struct ocfs2_live_connection *c; 185 186 BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); 187 188 list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { 189 if ((c->oc_conn->cc_namelen == len) && 190 !strncmp(c->oc_conn->cc_name, name, len)) 191 return c; 192 } 193 194 return c; 195} 196 197/* 198 * ocfs2_live_connection structures are created underneath the ocfs2 199 * mount path. Since the VFS prevents multiple calls to 200 * fill_super(), we can't get dupes here. 201 */ 202static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, 203 struct ocfs2_live_connection **c_ret) 204{ 205 int rc = 0; 206 struct ocfs2_live_connection *c; 207 208 c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); 209 if (!c) 210 return -ENOMEM; 211 212 mutex_lock(&ocfs2_control_lock); 213 c->oc_conn = conn; 214 215 if (atomic_read(&ocfs2_control_opened)) 216 list_add(&c->oc_list, &ocfs2_live_connection_list); 217 else { 218 printk(KERN_ERR 219 "ocfs2: Userspace control daemon is not present\n"); 220 rc = -ESRCH; 221 } 222 223 mutex_unlock(&ocfs2_control_lock); 224 225 if (!rc) 226 *c_ret = c; 227 else 228 kfree(c); 229 230 return rc; 231} 232 233/* 234 * This function disconnects the cluster connection from ocfs2_control. 235 * Afterwards, userspace can't affect the cluster connection. 236 */ 237static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) 238{ 239 mutex_lock(&ocfs2_control_lock); 240 list_del_init(&c->oc_list); 241 c->oc_conn = NULL; 242 mutex_unlock(&ocfs2_control_lock); 243 244 kfree(c); 245} 246 247static int ocfs2_control_cfu(void *target, size_t target_len, 248 const char __user *buf, size_t count) 249{ 250 /* The T01 expects write(2) calls to have exactly one command */ 251 if ((count != target_len) || 252 (count > sizeof(union ocfs2_control_message))) 253 return -EINVAL; 254 255 if (copy_from_user(target, buf, target_len)) 256 return -EFAULT; 257 258 return 0; 259} 260 261static ssize_t ocfs2_control_validate_protocol(struct file *file, 262 const char __user *buf, 263 size_t count) 264{ 265 ssize_t ret; 266 char kbuf[OCFS2_CONTROL_PROTO_LEN]; 267 268 ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, 269 buf, count); 270 if (ret) 271 return ret; 272 273 if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) 274 return -EINVAL; 275 276 ocfs2_control_set_handshake_state(file, 277 OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 278 279 return count; 280} 281 282static void ocfs2_control_send_down(const char *uuid, 283 int nodenum) 284{ 285 struct ocfs2_live_connection *c; 286 287 mutex_lock(&ocfs2_control_lock); 288 289 c = ocfs2_connection_find(uuid); 290 if (c) { 291 BUG_ON(c->oc_conn == NULL); 292 c->oc_conn->cc_recovery_handler(nodenum, 293 c->oc_conn->cc_recovery_data); 294 } 295 296 mutex_unlock(&ocfs2_control_lock); 297} 298 299/* 300 * Called whenever configuration elements are sent to /dev/ocfs2_control. 301 * If all configuration elements are present, try to set the global 302 * values. If there is a problem, return an error. Skip any missing 303 * elements, and only bump ocfs2_control_opened when we have all elements 304 * and are successful. 305 */ 306static int ocfs2_control_install_private(struct file *file) 307{ 308 int rc = 0; 309 int set_p = 1; 310 struct ocfs2_control_private *p = file->private_data; 311 312 BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 313 314 mutex_lock(&ocfs2_control_lock); 315 316 if (p->op_this_node < 0) { 317 set_p = 0; 318 } else if ((ocfs2_control_this_node >= 0) && 319 (ocfs2_control_this_node != p->op_this_node)) { 320 rc = -EINVAL; 321 goto out_unlock; 322 } 323 324 if (!p->op_proto.pv_major) { 325 set_p = 0; 326 } else if (!list_empty(&ocfs2_live_connection_list) && 327 ((running_proto.pv_major != p->op_proto.pv_major) || 328 (running_proto.pv_minor != p->op_proto.pv_minor))) { 329 rc = -EINVAL; 330 goto out_unlock; 331 } 332 333 if (set_p) { 334 ocfs2_control_this_node = p->op_this_node; 335 running_proto.pv_major = p->op_proto.pv_major; 336 running_proto.pv_minor = p->op_proto.pv_minor; 337 } 338 339out_unlock: 340 mutex_unlock(&ocfs2_control_lock); 341 342 if (!rc && set_p) { 343 /* We set the global values successfully */ 344 atomic_inc(&ocfs2_control_opened); 345 ocfs2_control_set_handshake_state(file, 346 OCFS2_CONTROL_HANDSHAKE_VALID); 347 } 348 349 return rc; 350} 351 352static int ocfs2_control_get_this_node(void) 353{ 354 int rc; 355 356 mutex_lock(&ocfs2_control_lock); 357 if (ocfs2_control_this_node < 0) 358 rc = -EINVAL; 359 else 360 rc = ocfs2_control_this_node; 361 mutex_unlock(&ocfs2_control_lock); 362 363 return rc; 364} 365 366static int ocfs2_control_do_setnode_msg(struct file *file, 367 struct ocfs2_control_message_setn *msg) 368{ 369 long nodenum; 370 char *ptr = NULL; 371 struct ocfs2_control_private *p = file->private_data; 372 373 if (ocfs2_control_get_handshake_state(file) != 374 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 375 return -EINVAL; 376 377 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 378 OCFS2_CONTROL_MESSAGE_OP_LEN)) 379 return -EINVAL; 380 381 if ((msg->space != ' ') || (msg->newline != '\n')) 382 return -EINVAL; 383 msg->space = msg->newline = '\0'; 384 385 nodenum = simple_strtol(msg->nodestr, &ptr, 16); 386 if (!ptr || *ptr) 387 return -EINVAL; 388 389 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 390 (nodenum > INT_MAX) || (nodenum < 0)) 391 return -ERANGE; 392 p->op_this_node = nodenum; 393 394 return ocfs2_control_install_private(file); 395} 396 397static int ocfs2_control_do_setversion_msg(struct file *file, 398 struct ocfs2_control_message_setv *msg) 399 { 400 long major, minor; 401 char *ptr = NULL; 402 struct ocfs2_control_private *p = file->private_data; 403 struct ocfs2_protocol_version *max = 404 &ocfs2_user_plugin.sp_max_proto; 405 406 if (ocfs2_control_get_handshake_state(file) != 407 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 408 return -EINVAL; 409 410 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 411 OCFS2_CONTROL_MESSAGE_OP_LEN)) 412 return -EINVAL; 413 414 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 415 (msg->newline != '\n')) 416 return -EINVAL; 417 msg->space1 = msg->space2 = msg->newline = '\0'; 418 419 major = simple_strtol(msg->major, &ptr, 16); 420 if (!ptr || *ptr) 421 return -EINVAL; 422 minor = simple_strtol(msg->minor, &ptr, 16); 423 if (!ptr || *ptr) 424 return -EINVAL; 425 426 /* 427 * The major must be between 1 and 255, inclusive. The minor 428 * must be between 0 and 255, inclusive. The version passed in 429 * must be within the maximum version supported by the filesystem. 430 */ 431 if ((major == LONG_MIN) || (major == LONG_MAX) || 432 (major > (u8)-1) || (major < 1)) 433 return -ERANGE; 434 if ((minor == LONG_MIN) || (minor == LONG_MAX) || 435 (minor > (u8)-1) || (minor < 0)) 436 return -ERANGE; 437 if ((major != max->pv_major) || 438 (minor > max->pv_minor)) 439 return -EINVAL; 440 441 p->op_proto.pv_major = major; 442 p->op_proto.pv_minor = minor; 443 444 return ocfs2_control_install_private(file); 445} 446 447static int ocfs2_control_do_down_msg(struct file *file, 448 struct ocfs2_control_message_down *msg) 449{ 450 long nodenum; 451 char *p = NULL; 452 453 if (ocfs2_control_get_handshake_state(file) != 454 OCFS2_CONTROL_HANDSHAKE_VALID) 455 return -EINVAL; 456 457 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 458 OCFS2_CONTROL_MESSAGE_OP_LEN)) 459 return -EINVAL; 460 461 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 462 (msg->newline != '\n')) 463 return -EINVAL; 464 msg->space1 = msg->space2 = msg->newline = '\0'; 465 466 nodenum = simple_strtol(msg->nodestr, &p, 16); 467 if (!p || *p) 468 return -EINVAL; 469 470 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 471 (nodenum > INT_MAX) || (nodenum < 0)) 472 return -ERANGE; 473 474 ocfs2_control_send_down(msg->uuid, nodenum); 475 476 return 0; 477} 478 479static ssize_t ocfs2_control_message(struct file *file, 480 const char __user *buf, 481 size_t count) 482{ 483 ssize_t ret; 484 union ocfs2_control_message msg; 485 486 /* Try to catch padding issues */ 487 WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != 488 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); 489 490 memset(&msg, 0, sizeof(union ocfs2_control_message)); 491 ret = ocfs2_control_cfu(&msg, count, buf, count); 492 if (ret) 493 goto out; 494 495 if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && 496 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 497 OCFS2_CONTROL_MESSAGE_OP_LEN)) 498 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); 499 else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && 500 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 501 OCFS2_CONTROL_MESSAGE_OP_LEN)) 502 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); 503 else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && 504 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 505 OCFS2_CONTROL_MESSAGE_OP_LEN)) 506 ret = ocfs2_control_do_down_msg(file, &msg.u_down); 507 else 508 ret = -EINVAL; 509 510out: 511 return ret ? ret : count; 512} 513 514static ssize_t ocfs2_control_write(struct file *file, 515 const char __user *buf, 516 size_t count, 517 loff_t *ppos) 518{ 519 ssize_t ret; 520 521 switch (ocfs2_control_get_handshake_state(file)) { 522 case OCFS2_CONTROL_HANDSHAKE_INVALID: 523 ret = -EINVAL; 524 break; 525 526 case OCFS2_CONTROL_HANDSHAKE_READ: 527 ret = ocfs2_control_validate_protocol(file, buf, 528 count); 529 break; 530 531 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: 532 case OCFS2_CONTROL_HANDSHAKE_VALID: 533 ret = ocfs2_control_message(file, buf, count); 534 break; 535 536 default: 537 BUG(); 538 ret = -EIO; 539 break; 540 } 541 542 return ret; 543} 544 545/* 546 * This is a naive version. If we ever have a new protocol, we'll expand 547 * it. Probably using seq_file. 548 */ 549static ssize_t ocfs2_control_read(struct file *file, 550 char __user *buf, 551 size_t count, 552 loff_t *ppos) 553{ 554 ssize_t ret; 555 556 ret = simple_read_from_buffer(buf, count, ppos, 557 OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN); 558 559 /* Have we read the whole protocol list? */ 560 if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN) 561 ocfs2_control_set_handshake_state(file, 562 OCFS2_CONTROL_HANDSHAKE_READ); 563 564 return ret; 565} 566 567static int ocfs2_control_release(struct inode *inode, struct file *file) 568{ 569 struct ocfs2_control_private *p = file->private_data; 570 571 mutex_lock(&ocfs2_control_lock); 572 573 if (ocfs2_control_get_handshake_state(file) != 574 OCFS2_CONTROL_HANDSHAKE_VALID) 575 goto out; 576 577 if (atomic_dec_and_test(&ocfs2_control_opened)) { 578 if (!list_empty(&ocfs2_live_connection_list)) { 579 printk(KERN_ERR 580 "ocfs2: Unexpected release of ocfs2_control!\n" 581 " Loss of cluster connection requires " 582 "an emergency restart!\n"); 583 emergency_restart(); 584 } 585 /* 586 * Last valid close clears the node number and resets 587 * the locking protocol version 588 */ 589 ocfs2_control_this_node = -1; 590 running_proto.pv_major = 0; 591 running_proto.pv_major = 0; 592 } 593 594out: 595 list_del_init(&p->op_list); 596 file->private_data = NULL; 597 598 mutex_unlock(&ocfs2_control_lock); 599 600 kfree(p); 601 602 return 0; 603} 604 605static int ocfs2_control_open(struct inode *inode, struct file *file) 606{ 607 struct ocfs2_control_private *p; 608 609 p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); 610 if (!p) 611 return -ENOMEM; 612 p->op_this_node = -1; 613 614 lock_kernel(); 615 mutex_lock(&ocfs2_control_lock); 616 file->private_data = p; 617 list_add(&p->op_list, &ocfs2_control_private_list); 618 mutex_unlock(&ocfs2_control_lock); 619 unlock_kernel(); 620 621 return 0; 622} 623 624static const struct file_operations ocfs2_control_fops = { 625 .open = ocfs2_control_open, 626 .release = ocfs2_control_release, 627 .read = ocfs2_control_read, 628 .write = ocfs2_control_write, 629 .owner = THIS_MODULE, 630}; 631 632static struct miscdevice ocfs2_control_device = { 633 .minor = MISC_DYNAMIC_MINOR, 634 .name = "ocfs2_control", 635 .fops = &ocfs2_control_fops, 636}; 637 638static int ocfs2_control_init(void) 639{ 640 int rc; 641 642 atomic_set(&ocfs2_control_opened, 0); 643 644 rc = misc_register(&ocfs2_control_device); 645 if (rc) 646 printk(KERN_ERR 647 "ocfs2: Unable to register ocfs2_control device " 648 "(errno %d)\n", 649 -rc); 650 651 return rc; 652} 653 654static void ocfs2_control_exit(void) 655{ 656 int rc; 657 658 rc = misc_deregister(&ocfs2_control_device); 659 if (rc) 660 printk(KERN_ERR 661 "ocfs2: Unable to deregister ocfs2_control device " 662 "(errno %d)\n", 663 -rc); 664} 665 666static void fsdlm_lock_ast_wrapper(void *astarg) 667{ 668 struct ocfs2_dlm_lksb *lksb = astarg; 669 int status = lksb->lksb_fsdlm.sb_status; 670 671 /* 672 * For now we're punting on the issue of other non-standard errors 673 * where we can't tell if the unlock_ast or lock_ast should be called. 674 * The main "other error" that's possible is EINVAL which means the 675 * function was called with invalid args, which shouldn't be possible 676 * since the caller here is under our control. Other non-standard 677 * errors probably fall into the same category, or otherwise are fatal 678 * which means we can't carry on anyway. 679 */ 680 681 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) 682 lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0); 683 else 684 lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); 685} 686 687static void fsdlm_blocking_ast_wrapper(void *astarg, int level) 688{ 689 struct ocfs2_dlm_lksb *lksb = astarg; 690 691 lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); 692} 693 694static int user_dlm_lock(struct ocfs2_cluster_connection *conn, 695 int mode, 696 struct ocfs2_dlm_lksb *lksb, 697 u32 flags, 698 void *name, 699 unsigned int namelen) 700{ 701 int ret; 702 703 if (!lksb->lksb_fsdlm.sb_lvbptr) 704 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 705 sizeof(struct dlm_lksb); 706 707 ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, 708 flags|DLM_LKF_NODLCKWT, name, namelen, 0, 709 fsdlm_lock_ast_wrapper, lksb, 710 fsdlm_blocking_ast_wrapper); 711 return ret; 712} 713 714static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, 715 struct ocfs2_dlm_lksb *lksb, 716 u32 flags) 717{ 718 int ret; 719 720 ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, 721 flags, &lksb->lksb_fsdlm, lksb); 722 return ret; 723} 724 725static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) 726{ 727 return lksb->lksb_fsdlm.sb_status; 728} 729 730static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) 731{ 732 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; 733 734 return !invalid; 735} 736 737static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb) 738{ 739 if (!lksb->lksb_fsdlm.sb_lvbptr) 740 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 741 sizeof(struct dlm_lksb); 742 return (void *)(lksb->lksb_fsdlm.sb_lvbptr); 743} 744 745static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) 746{ 747} 748 749static int user_plock(struct ocfs2_cluster_connection *conn, 750 u64 ino, 751 struct file *file, 752 int cmd, 753 struct file_lock *fl) 754{ 755 /* 756 * This more or less just demuxes the plock request into any 757 * one of three dlm calls. 758 * 759 * Internally, fs/dlm will pass these to a misc device, which 760 * a userspace daemon will read and write to. 761 * 762 * For now, cancel requests (which happen internally only), 763 * are turned into unlocks. Most of this function taken from 764 * gfs2_lock. 765 */ 766 767 if (cmd == F_CANCELLK) { 768 cmd = F_SETLK; 769 fl->fl_type = F_UNLCK; 770 } 771 772 if (IS_GETLK(cmd)) 773 return dlm_posix_get(conn->cc_lockspace, ino, file, fl); 774 else if (fl->fl_type == F_UNLCK) 775 return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); 776 else 777 return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); 778} 779 780/* 781 * Compare a requested locking protocol version against the current one. 782 * 783 * If the major numbers are different, they are incompatible. 784 * If the current minor is greater than the request, they are incompatible. 785 * If the current minor is less than or equal to the request, they are 786 * compatible, and the requester should run at the current minor version. 787 */ 788static int fs_protocol_compare(struct ocfs2_protocol_version *existing, 789 struct ocfs2_protocol_version *request) 790{ 791 if (existing->pv_major != request->pv_major) 792 return 1; 793 794 if (existing->pv_minor > request->pv_minor) 795 return 1; 796 797 if (existing->pv_minor < request->pv_minor) 798 request->pv_minor = existing->pv_minor; 799 800 return 0; 801} 802 803static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 804{ 805 dlm_lockspace_t *fsdlm; 806 struct ocfs2_live_connection *uninitialized_var(control); 807 int rc = 0; 808 809 BUG_ON(conn == NULL); 810 811 rc = ocfs2_live_connection_new(conn, &control); 812 if (rc) 813 goto out; 814 815 /* 816 * running_proto must have been set before we allowed any mounts 817 * to proceed. 818 */ 819 if (fs_protocol_compare(&running_proto, &conn->cc_version)) { 820 printk(KERN_ERR 821 "Unable to mount with fs locking protocol version " 822 "%u.%u because the userspace control daemon has " 823 "negotiated %u.%u\n", 824 conn->cc_version.pv_major, conn->cc_version.pv_minor, 825 running_proto.pv_major, running_proto.pv_minor); 826 rc = -EPROTO; 827 ocfs2_live_connection_drop(control); 828 goto out; 829 } 830 831 rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), 832 &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); 833 if (rc) { 834 ocfs2_live_connection_drop(control); 835 goto out; 836 } 837 838 conn->cc_private = control; 839 conn->cc_lockspace = fsdlm; 840out: 841 return rc; 842} 843 844static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) 845{ 846 dlm_release_lockspace(conn->cc_lockspace, 2); 847 conn->cc_lockspace = NULL; 848 ocfs2_live_connection_drop(conn->cc_private); 849 conn->cc_private = NULL; 850 return 0; 851} 852 853static int user_cluster_this_node(unsigned int *this_node) 854{ 855 int rc; 856 857 rc = ocfs2_control_get_this_node(); 858 if (rc < 0) 859 return rc; 860 861 *this_node = rc; 862 return 0; 863} 864 865static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { 866 .connect = user_cluster_connect, 867 .disconnect = user_cluster_disconnect, 868 .this_node = user_cluster_this_node, 869 .dlm_lock = user_dlm_lock, 870 .dlm_unlock = user_dlm_unlock, 871 .lock_status = user_dlm_lock_status, 872 .lvb_valid = user_dlm_lvb_valid, 873 .lock_lvb = user_dlm_lvb, 874 .plock = user_plock, 875 .dump_lksb = user_dlm_dump_lksb, 876}; 877 878static struct ocfs2_stack_plugin ocfs2_user_plugin = { 879 .sp_name = "user", 880 .sp_ops = &ocfs2_user_plugin_ops, 881 .sp_owner = THIS_MODULE, 882}; 883 884 885static int __init ocfs2_user_plugin_init(void) 886{ 887 int rc; 888 889 rc = ocfs2_control_init(); 890 if (!rc) { 891 rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); 892 if (rc) 893 ocfs2_control_exit(); 894 } 895 896 return rc; 897} 898 899static void __exit ocfs2_user_plugin_exit(void) 900{ 901 ocfs2_stack_glue_unregister(&ocfs2_user_plugin); 902 ocfs2_control_exit(); 903} 904 905MODULE_AUTHOR("Oracle"); 906MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); 907MODULE_LICENSE("GPL"); 908module_init(ocfs2_user_plugin_init); 909module_exit(ocfs2_user_plugin_exit); 910