1/* 2 * Driver giving user-space access to the kernel's xenbus connection 3 * to xenstore. 4 * 5 * Copyright (c) 2005, Christian Limpach 6 * Copyright (c) 2005, Rusty Russell, IBM Corporation 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License version 2 10 * as published by the Free Software Foundation; or, when distributed 11 * separately from the Linux kernel or incorporated into other 12 * software packages, subject to the following license: 13 * 14 * Permission is hereby granted, free of charge, to any person obtaining a copy 15 * of this source file (the "Software"), to deal in the Software without 16 * restriction, including without limitation the rights to use, copy, modify, 17 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 18 * and to permit persons to whom the Software is furnished to do so, subject to 19 * the following conditions: 20 * 21 * The above copyright notice and this permission notice shall be included in 22 * all copies or substantial portions of the Software. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 30 * IN THE SOFTWARE. 31 * 32 * Changes: 33 * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem 34 * and /proc/xen compatibility mount point. 35 * Turned xenfs into a loadable module. 36 */ 37 38#include <linux/kernel.h> 39#include <linux/errno.h> 40#include <linux/uio.h> 41#include <linux/notifier.h> 42#include <linux/wait.h> 43#include <linux/fs.h> 44#include <linux/poll.h> 45#include <linux/mutex.h> 46#include <linux/sched.h> 47#include <linux/spinlock.h> 48#include <linux/mount.h> 49#include <linux/pagemap.h> 50#include <linux/uaccess.h> 51#include <linux/init.h> 52#include <linux/namei.h> 53#include <linux/string.h> 54#include <linux/slab.h> 55 56#include "xenfs.h" 57#include "../xenbus/xenbus_comms.h" 58 59#include <xen/xenbus.h> 60#include <asm/xen/hypervisor.h> 61 62/* 63 * An element of a list of outstanding transactions, for which we're 64 * still waiting a reply. 65 */ 66struct xenbus_transaction_holder { 67 struct list_head list; 68 struct xenbus_transaction handle; 69}; 70 71/* 72 * A buffer of data on the queue. 73 */ 74struct read_buffer { 75 struct list_head list; 76 unsigned int cons; 77 unsigned int len; 78 char msg[]; 79}; 80 81struct xenbus_file_priv { 82 /* 83 * msgbuffer_mutex is held while partial requests are built up 84 * and complete requests are acted on. It therefore protects 85 * the "transactions" and "watches" lists, and the partial 86 * request length and buffer. 87 * 88 * reply_mutex protects the reply being built up to return to 89 * usermode. It nests inside msgbuffer_mutex but may be held 90 * alone during a watch callback. 91 */ 92 struct mutex msgbuffer_mutex; 93 94 /* In-progress transactions */ 95 struct list_head transactions; 96 97 /* Active watches. */ 98 struct list_head watches; 99 100 /* Partial request. */ 101 unsigned int len; 102 union { 103 struct xsd_sockmsg msg; 104 char buffer[PAGE_SIZE]; 105 } u; 106 107 /* Response queue. */ 108 struct mutex reply_mutex; 109 struct list_head read_buffers; 110 wait_queue_head_t read_waitq; 111 112}; 113 114/* Read out any raw xenbus messages queued up. */ 115static ssize_t xenbus_file_read(struct file *filp, 116 char __user *ubuf, 117 size_t len, loff_t *ppos) 118{ 119 struct xenbus_file_priv *u = filp->private_data; 120 struct read_buffer *rb; 121 unsigned i; 122 int ret; 123 124 mutex_lock(&u->reply_mutex); 125 while (list_empty(&u->read_buffers)) { 126 mutex_unlock(&u->reply_mutex); 127 if (filp->f_flags & O_NONBLOCK) 128 return -EAGAIN; 129 130 ret = wait_event_interruptible(u->read_waitq, 131 !list_empty(&u->read_buffers)); 132 if (ret) 133 return ret; 134 mutex_lock(&u->reply_mutex); 135 } 136 137 rb = list_entry(u->read_buffers.next, struct read_buffer, list); 138 i = 0; 139 while (i < len) { 140 unsigned sz = min((unsigned)len - i, rb->len - rb->cons); 141 142 ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); 143 144 i += sz - ret; 145 rb->cons += sz - ret; 146 147 if (ret != sz) { 148 if (i == 0) 149 i = -EFAULT; 150 goto out; 151 } 152 153 /* Clear out buffer if it has been consumed */ 154 if (rb->cons == rb->len) { 155 list_del(&rb->list); 156 kfree(rb); 157 if (list_empty(&u->read_buffers)) 158 break; 159 rb = list_entry(u->read_buffers.next, 160 struct read_buffer, list); 161 } 162 } 163 164out: 165 mutex_unlock(&u->reply_mutex); 166 return i; 167} 168 169/* 170 * Add a buffer to the queue. Caller must hold the appropriate lock 171 * if the queue is not local. (Commonly the caller will build up 172 * multiple queued buffers on a temporary local list, and then add it 173 * to the appropriate list under lock once all the buffers have een 174 * successfully allocated.) 175 */ 176static int queue_reply(struct list_head *queue, const void *data, size_t len) 177{ 178 struct read_buffer *rb; 179 180 if (len == 0) 181 return 0; 182 183 rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); 184 if (rb == NULL) 185 return -ENOMEM; 186 187 rb->cons = 0; 188 rb->len = len; 189 190 memcpy(rb->msg, data, len); 191 192 list_add_tail(&rb->list, queue); 193 return 0; 194} 195 196/* 197 * Free all the read_buffer s on a list. 198 * Caller must have sole reference to list. 199 */ 200static void queue_cleanup(struct list_head *list) 201{ 202 struct read_buffer *rb; 203 204 while (!list_empty(list)) { 205 rb = list_entry(list->next, struct read_buffer, list); 206 list_del(list->next); 207 kfree(rb); 208 } 209} 210 211struct watch_adapter { 212 struct list_head list; 213 struct xenbus_watch watch; 214 struct xenbus_file_priv *dev_data; 215 char *token; 216}; 217 218static void free_watch_adapter(struct watch_adapter *watch) 219{ 220 kfree(watch->watch.node); 221 kfree(watch->token); 222 kfree(watch); 223} 224 225static struct watch_adapter *alloc_watch_adapter(const char *path, 226 const char *token) 227{ 228 struct watch_adapter *watch; 229 230 watch = kzalloc(sizeof(*watch), GFP_KERNEL); 231 if (watch == NULL) 232 goto out_fail; 233 234 watch->watch.node = kstrdup(path, GFP_KERNEL); 235 if (watch->watch.node == NULL) 236 goto out_free; 237 238 watch->token = kstrdup(token, GFP_KERNEL); 239 if (watch->token == NULL) 240 goto out_free; 241 242 return watch; 243 244out_free: 245 free_watch_adapter(watch); 246 247out_fail: 248 return NULL; 249} 250 251static void watch_fired(struct xenbus_watch *watch, 252 const char **vec, 253 unsigned int len) 254{ 255 struct watch_adapter *adap; 256 struct xsd_sockmsg hdr; 257 const char *path, *token; 258 int path_len, tok_len, body_len, data_len = 0; 259 int ret; 260 LIST_HEAD(staging_q); 261 262 adap = container_of(watch, struct watch_adapter, watch); 263 264 path = vec[XS_WATCH_PATH]; 265 token = adap->token; 266 267 path_len = strlen(path) + 1; 268 tok_len = strlen(token) + 1; 269 if (len > 2) 270 data_len = vec[len] - vec[2] + 1; 271 body_len = path_len + tok_len + data_len; 272 273 hdr.type = XS_WATCH_EVENT; 274 hdr.len = body_len; 275 276 mutex_lock(&adap->dev_data->reply_mutex); 277 278 ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); 279 if (!ret) 280 ret = queue_reply(&staging_q, path, path_len); 281 if (!ret) 282 ret = queue_reply(&staging_q, token, tok_len); 283 if (!ret && len > 2) 284 ret = queue_reply(&staging_q, vec[2], data_len); 285 286 if (!ret) { 287 /* success: pass reply list onto watcher */ 288 list_splice_tail(&staging_q, &adap->dev_data->read_buffers); 289 wake_up(&adap->dev_data->read_waitq); 290 } else 291 queue_cleanup(&staging_q); 292 293 mutex_unlock(&adap->dev_data->reply_mutex); 294} 295 296static int xenbus_write_transaction(unsigned msg_type, 297 struct xenbus_file_priv *u) 298{ 299 int rc; 300 void *reply; 301 struct xenbus_transaction_holder *trans = NULL; 302 LIST_HEAD(staging_q); 303 304 if (msg_type == XS_TRANSACTION_START) { 305 trans = kmalloc(sizeof(*trans), GFP_KERNEL); 306 if (!trans) { 307 rc = -ENOMEM; 308 goto out; 309 } 310 } 311 312 reply = xenbus_dev_request_and_reply(&u->u.msg); 313 if (IS_ERR(reply)) { 314 kfree(trans); 315 rc = PTR_ERR(reply); 316 goto out; 317 } 318 319 if (msg_type == XS_TRANSACTION_START) { 320 trans->handle.id = simple_strtoul(reply, NULL, 0); 321 322 list_add(&trans->list, &u->transactions); 323 } else if (msg_type == XS_TRANSACTION_END) { 324 list_for_each_entry(trans, &u->transactions, list) 325 if (trans->handle.id == u->u.msg.tx_id) 326 break; 327 BUG_ON(&trans->list == &u->transactions); 328 list_del(&trans->list); 329 330 kfree(trans); 331 } 332 333 mutex_lock(&u->reply_mutex); 334 rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); 335 if (!rc) 336 rc = queue_reply(&staging_q, reply, u->u.msg.len); 337 if (!rc) { 338 list_splice_tail(&staging_q, &u->read_buffers); 339 wake_up(&u->read_waitq); 340 } else { 341 queue_cleanup(&staging_q); 342 } 343 mutex_unlock(&u->reply_mutex); 344 345 kfree(reply); 346 347out: 348 return rc; 349} 350 351static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) 352{ 353 struct watch_adapter *watch, *tmp_watch; 354 char *path, *token; 355 int err, rc; 356 LIST_HEAD(staging_q); 357 358 path = u->u.buffer + sizeof(u->u.msg); 359 token = memchr(path, 0, u->u.msg.len); 360 if (token == NULL) { 361 rc = -EILSEQ; 362 goto out; 363 } 364 token++; 365 366 if (msg_type == XS_WATCH) { 367 watch = alloc_watch_adapter(path, token); 368 if (watch == NULL) { 369 rc = -ENOMEM; 370 goto out; 371 } 372 373 watch->watch.callback = watch_fired; 374 watch->dev_data = u; 375 376 err = register_xenbus_watch(&watch->watch); 377 if (err) { 378 free_watch_adapter(watch); 379 rc = err; 380 goto out; 381 } 382 list_add(&watch->list, &u->watches); 383 } else { 384 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { 385 if (!strcmp(watch->token, token) && 386 !strcmp(watch->watch.node, path)) { 387 unregister_xenbus_watch(&watch->watch); 388 list_del(&watch->list); 389 free_watch_adapter(watch); 390 break; 391 } 392 } 393 } 394 395 /* Success. Synthesize a reply to say all is OK. */ 396 { 397 struct { 398 struct xsd_sockmsg hdr; 399 char body[3]; 400 } __packed reply = { 401 { 402 .type = msg_type, 403 .len = sizeof(reply.body) 404 }, 405 "OK" 406 }; 407 408 mutex_lock(&u->reply_mutex); 409 rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); 410 mutex_unlock(&u->reply_mutex); 411 } 412 413out: 414 return rc; 415} 416 417static ssize_t xenbus_file_write(struct file *filp, 418 const char __user *ubuf, 419 size_t len, loff_t *ppos) 420{ 421 struct xenbus_file_priv *u = filp->private_data; 422 uint32_t msg_type; 423 int rc = len; 424 int ret; 425 LIST_HEAD(staging_q); 426 427 /* 428 * We're expecting usermode to be writing properly formed 429 * xenbus messages. If they write an incomplete message we 430 * buffer it up. Once it is complete, we act on it. 431 */ 432 433 /* 434 * Make sure concurrent writers can't stomp all over each 435 * other's messages and make a mess of our partial message 436 * buffer. We don't make any attemppt to stop multiple 437 * writers from making a mess of each other's incomplete 438 * messages; we're just trying to guarantee our own internal 439 * consistency and make sure that single writes are handled 440 * atomically. 441 */ 442 mutex_lock(&u->msgbuffer_mutex); 443 444 /* Get this out of the way early to avoid confusion */ 445 if (len == 0) 446 goto out; 447 448 /* Can't write a xenbus message larger we can buffer */ 449 if ((len + u->len) > sizeof(u->u.buffer)) { 450 /* On error, dump existing buffer */ 451 u->len = 0; 452 rc = -EINVAL; 453 goto out; 454 } 455 456 ret = copy_from_user(u->u.buffer + u->len, ubuf, len); 457 458 if (ret == len) { 459 rc = -EFAULT; 460 goto out; 461 } 462 463 /* Deal with a partial copy. */ 464 len -= ret; 465 rc = len; 466 467 u->len += len; 468 469 /* Return if we haven't got a full message yet */ 470 if (u->len < sizeof(u->u.msg)) 471 goto out; /* not even the header yet */ 472 473 /* If we're expecting a message that's larger than we can 474 possibly send, dump what we have and return an error. */ 475 if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { 476 rc = -E2BIG; 477 u->len = 0; 478 goto out; 479 } 480 481 if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) 482 goto out; /* incomplete data portion */ 483 484 /* 485 * OK, now we have a complete message. Do something with it. 486 */ 487 488 msg_type = u->u.msg.type; 489 490 switch (msg_type) { 491 case XS_TRANSACTION_START: 492 case XS_TRANSACTION_END: 493 case XS_DIRECTORY: 494 case XS_READ: 495 case XS_GET_PERMS: 496 case XS_RELEASE: 497 case XS_GET_DOMAIN_PATH: 498 case XS_WRITE: 499 case XS_MKDIR: 500 case XS_RM: 501 case XS_SET_PERMS: 502 /* Send out a transaction */ 503 ret = xenbus_write_transaction(msg_type, u); 504 break; 505 506 case XS_WATCH: 507 case XS_UNWATCH: 508 /* (Un)Ask for some path to be watched for changes */ 509 ret = xenbus_write_watch(msg_type, u); 510 break; 511 512 default: 513 ret = -EINVAL; 514 break; 515 } 516 if (ret != 0) 517 rc = ret; 518 519 /* Buffered message consumed */ 520 u->len = 0; 521 522 out: 523 mutex_unlock(&u->msgbuffer_mutex); 524 return rc; 525} 526 527static int xenbus_file_open(struct inode *inode, struct file *filp) 528{ 529 struct xenbus_file_priv *u; 530 531 if (xen_store_evtchn == 0) 532 return -ENOENT; 533 534 nonseekable_open(inode, filp); 535 536 u = kzalloc(sizeof(*u), GFP_KERNEL); 537 if (u == NULL) 538 return -ENOMEM; 539 540 INIT_LIST_HEAD(&u->transactions); 541 INIT_LIST_HEAD(&u->watches); 542 INIT_LIST_HEAD(&u->read_buffers); 543 init_waitqueue_head(&u->read_waitq); 544 545 mutex_init(&u->reply_mutex); 546 mutex_init(&u->msgbuffer_mutex); 547 548 filp->private_data = u; 549 550 return 0; 551} 552 553static int xenbus_file_release(struct inode *inode, struct file *filp) 554{ 555 struct xenbus_file_priv *u = filp->private_data; 556 struct xenbus_transaction_holder *trans, *tmp; 557 struct watch_adapter *watch, *tmp_watch; 558 559 /* 560 * No need for locking here because there are no other users, 561 * by definition. 562 */ 563 564 list_for_each_entry_safe(trans, tmp, &u->transactions, list) { 565 xenbus_transaction_end(trans->handle, 1); 566 list_del(&trans->list); 567 kfree(trans); 568 } 569 570 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { 571 unregister_xenbus_watch(&watch->watch); 572 list_del(&watch->list); 573 free_watch_adapter(watch); 574 } 575 576 kfree(u); 577 578 return 0; 579} 580 581static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) 582{ 583 struct xenbus_file_priv *u = file->private_data; 584 585 poll_wait(file, &u->read_waitq, wait); 586 if (!list_empty(&u->read_buffers)) 587 return POLLIN | POLLRDNORM; 588 return 0; 589} 590 591const struct file_operations xenbus_file_ops = { 592 .read = xenbus_file_read, 593 .write = xenbus_file_write, 594 .open = xenbus_file_open, 595 .release = xenbus_file_release, 596 .poll = xenbus_file_poll, 597}; 598