1/* 2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> 3 * All rights reserved. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 */ 15 16#include <linux/module.h> 17#include <linux/crypto.h> 18#include <linux/fs.h> 19#include <linux/jhash.h> 20#include <linux/hash.h> 21#include <linux/ktime.h> 22#include <linux/mempool.h> 23#include <linux/mm.h> 24#include <linux/mount.h> 25#include <linux/pagemap.h> 26#include <linux/parser.h> 27#include <linux/poll.h> 28#include <linux/swap.h> 29#include <linux/slab.h> 30#include <linux/statfs.h> 31#include <linux/writeback.h> 32 33#include "netfs.h" 34 35static struct kmem_cache *netfs_trans_dst; 36static mempool_t *netfs_trans_dst_pool; 37 38static void netfs_trans_init_static(struct netfs_trans *t, int num, int size) 39{ 40 t->page_num = num; 41 t->total_size = size; 42 atomic_set(&t->refcnt, 1); 43 44 spin_lock_init(&t->dst_lock); 45 INIT_LIST_HEAD(&t->dst_list); 46} 47 48static int netfs_trans_send_pages(struct netfs_trans *t, struct netfs_state *st) 49{ 50 int err = 0; 51 unsigned int i, attached_pages = t->attached_pages, ci; 52 struct msghdr msg; 53 struct page **pages = (t->eng)?t->eng->pages:t->pages; 54 struct page *p; 55 unsigned int size; 56 57 msg.msg_name = NULL; 58 msg.msg_namelen = 0; 59 msg.msg_control = NULL; 60 msg.msg_controllen = 0; 61 msg.msg_flags = MSG_WAITALL | MSG_MORE; 62 63 ci = 0; 64 for (i=0; i<t->page_num; ++i) { 65 struct page *page = pages[ci]; 66 struct netfs_cmd cmd; 67 struct iovec io; 68 69 p = t->pages[i]; 70 71 if (!p) 72 continue; 73 74 size = page_private(p); 75 76 io.iov_base = &cmd; 77 io.iov_len = sizeof(struct netfs_cmd); 78 79 cmd.cmd = NETFS_WRITE_PAGE; 80 cmd.ext = 0; 81 cmd.id = 0; 82 cmd.size = size; 83 cmd.start = p->index; 84 cmd.start <<= PAGE_CACHE_SHIFT; 85 cmd.csize = 0; 86 cmd.cpad = 0; 87 cmd.iv = pohmelfs_gen_iv(t); 88 89 netfs_convert_cmd(&cmd); 90 91 msg.msg_iov = &io; 92 msg.msg_iovlen = 1; 93 msg.msg_flags = MSG_WAITALL | MSG_MORE; 94 95 err = kernel_sendmsg(st->socket, &msg, (struct kvec *)msg.msg_iov, 1, sizeof(struct netfs_cmd)); 96 if (err <= 0) { 97 printk("%s: %d/%d failed to send transaction header: t: %p, gen: %u, err: %d.\n", 98 __func__, i, t->page_num, t, t->gen, err); 99 if (err == 0) 100 err = -ECONNRESET; 101 goto err_out; 102 } 103 104 msg.msg_flags = MSG_WAITALL | (attached_pages == 1 ? 0 : 105 MSG_MORE); 106 107 err = kernel_sendpage(st->socket, page, 0, size, msg.msg_flags); 108 if (err <= 0) { 109 printk("%s: %d/%d failed to send transaction page: t: %p, gen: %u, size: %u, err: %d.\n", 110 __func__, i, t->page_num, t, t->gen, size, err); 111 if (err == 0) 112 err = -ECONNRESET; 113 goto err_out; 114 } 115 116 dprintk("%s: %d/%d sent t: %p, gen: %u, page: %p/%p, size: %u.\n", 117 __func__, i, t->page_num, t, t->gen, page, p, size); 118 119 err = 0; 120 attached_pages--; 121 if (!attached_pages) 122 break; 123 ci++; 124 125 continue; 126 127err_out: 128 printk("%s: t: %p, gen: %u, err: %d.\n", __func__, t, t->gen, err); 129 netfs_state_exit(st); 130 break; 131 } 132 133 return err; 134} 135 136int netfs_trans_send(struct netfs_trans *t, struct netfs_state *st) 137{ 138 int err; 139 struct msghdr msg; 140 141 BUG_ON(!t->iovec.iov_len); 142 BUG_ON(t->iovec.iov_len > 1024*1024*1024); 143 144 netfs_state_lock_send(st); 145 if (!st->socket) { 146 err = netfs_state_init(st); 147 if (err) 148 goto err_out_unlock_return; 149 } 150 151 msg.msg_iov = &t->iovec; 152 msg.msg_iovlen = 1; 153 msg.msg_name = NULL; 154 msg.msg_namelen = 0; 155 msg.msg_control = NULL; 156 msg.msg_controllen = 0; 157 msg.msg_flags = MSG_WAITALL; 158 159 if (t->attached_pages) 160 msg.msg_flags |= MSG_MORE; 161 162 err = kernel_sendmsg(st->socket, &msg, (struct kvec *)msg.msg_iov, 1, t->iovec.iov_len); 163 if (err <= 0) { 164 printk("%s: failed to send contig transaction: t: %p, gen: %u, size: %zu, err: %d.\n", 165 __func__, t, t->gen, t->iovec.iov_len, err); 166 if (err == 0) 167 err = -ECONNRESET; 168 goto err_out_unlock_return; 169 } 170 171 dprintk("%s: sent %s transaction: t: %p, gen: %u, size: %zu, page_num: %u.\n", 172 __func__, (t->page_num)?"partial":"full", 173 t, t->gen, t->iovec.iov_len, t->page_num); 174 175 err = 0; 176 if (t->attached_pages) 177 err = netfs_trans_send_pages(t, st); 178 179err_out_unlock_return: 180 181 if (st->need_reset) 182 netfs_state_exit(st); 183 184 netfs_state_unlock_send(st); 185 186 dprintk("%s: t: %p, gen: %u, err: %d.\n", 187 __func__, t, t->gen, err); 188 189 t->result = err; 190 return err; 191} 192 193static inline int netfs_trans_cmp(unsigned int gen, unsigned int new) 194{ 195 if (gen < new) 196 return 1; 197 if (gen > new) 198 return -1; 199 return 0; 200} 201 202struct netfs_trans_dst *netfs_trans_search(struct netfs_state *st, unsigned int gen) 203{ 204 struct rb_root *root = &st->trans_root; 205 struct rb_node *n = root->rb_node; 206 struct netfs_trans_dst *tmp, *ret = NULL; 207 struct netfs_trans *t; 208 int cmp; 209 210 while (n) { 211 tmp = rb_entry(n, struct netfs_trans_dst, state_entry); 212 t = tmp->trans; 213 214 cmp = netfs_trans_cmp(t->gen, gen); 215 if (cmp < 0) 216 n = n->rb_left; 217 else if (cmp > 0) 218 n = n->rb_right; 219 else { 220 ret = tmp; 221 break; 222 } 223 } 224 225 return ret; 226} 227 228static int netfs_trans_insert(struct netfs_trans_dst *ndst, struct netfs_state *st) 229{ 230 struct rb_root *root = &st->trans_root; 231 struct rb_node **n = &root->rb_node, *parent = NULL; 232 struct netfs_trans_dst *ret = NULL, *tmp; 233 struct netfs_trans *t = NULL, *new = ndst->trans; 234 int cmp; 235 236 while (*n) { 237 parent = *n; 238 239 tmp = rb_entry(parent, struct netfs_trans_dst, state_entry); 240 t = tmp->trans; 241 242 cmp = netfs_trans_cmp(t->gen, new->gen); 243 if (cmp < 0) 244 n = &parent->rb_left; 245 else if (cmp > 0) 246 n = &parent->rb_right; 247 else { 248 ret = tmp; 249 break; 250 } 251 } 252 253 if (ret) { 254 printk("%s: exist: old: gen: %u, flags: %x, send_time: %lu, " 255 "new: gen: %u, flags: %x, send_time: %lu.\n", 256 __func__, t->gen, t->flags, ret->send_time, 257 new->gen, new->flags, ndst->send_time); 258 return -EEXIST; 259 } 260 261 rb_link_node(&ndst->state_entry, parent, n); 262 rb_insert_color(&ndst->state_entry, root); 263 ndst->send_time = jiffies; 264 265 return 0; 266} 267 268int netfs_trans_remove_nolock(struct netfs_trans_dst *dst, struct netfs_state *st) 269{ 270 if (dst && dst->state_entry.rb_parent_color) { 271 rb_erase(&dst->state_entry, &st->trans_root); 272 dst->state_entry.rb_parent_color = 0; 273 return 1; 274 } 275 return 0; 276} 277 278static int netfs_trans_remove_state(struct netfs_trans_dst *dst) 279{ 280 int ret; 281 struct netfs_state *st = dst->state; 282 283 mutex_lock(&st->trans_lock); 284 ret = netfs_trans_remove_nolock(dst, st); 285 mutex_unlock(&st->trans_lock); 286 287 return ret; 288} 289 290/* 291 * Create new destination for given transaction associated with given network state. 292 * Transaction's reference counter is bumped and will be dropped when either 293 * reply is received or when async timeout detection task will fail resending 294 * and drop transaction. 295 */ 296static int netfs_trans_push_dst(struct netfs_trans *t, struct netfs_state *st) 297{ 298 struct netfs_trans_dst *dst; 299 int err; 300 301 dst = mempool_alloc(netfs_trans_dst_pool, GFP_KERNEL); 302 if (!dst) 303 return -ENOMEM; 304 305 dst->retries = 0; 306 dst->send_time = 0; 307 dst->state = st; 308 dst->trans = t; 309 netfs_trans_get(t); 310 311 mutex_lock(&st->trans_lock); 312 err = netfs_trans_insert(dst, st); 313 mutex_unlock(&st->trans_lock); 314 315 if (err) 316 goto err_out_free; 317 318 spin_lock(&t->dst_lock); 319 list_add_tail(&dst->trans_entry, &t->dst_list); 320 spin_unlock(&t->dst_lock); 321 322 return 0; 323 324err_out_free: 325 t->result = err; 326 netfs_trans_put(t); 327 mempool_free(dst, netfs_trans_dst_pool); 328 return err; 329} 330 331static void netfs_trans_free_dst(struct netfs_trans_dst *dst) 332{ 333 netfs_trans_put(dst->trans); 334 mempool_free(dst, netfs_trans_dst_pool); 335} 336 337static void netfs_trans_remove_dst(struct netfs_trans_dst *dst) 338{ 339 if (netfs_trans_remove_state(dst)) 340 netfs_trans_free_dst(dst); 341} 342 343/* 344 * Drop destination transaction entry when we know it. 345 */ 346void netfs_trans_drop_dst(struct netfs_trans_dst *dst) 347{ 348 struct netfs_trans *t = dst->trans; 349 350 spin_lock(&t->dst_lock); 351 list_del_init(&dst->trans_entry); 352 spin_unlock(&t->dst_lock); 353 354 netfs_trans_remove_dst(dst); 355} 356 357/* 358 * Drop destination transaction entry when we know it and when we 359 * already removed dst from state tree. 360 */ 361void netfs_trans_drop_dst_nostate(struct netfs_trans_dst *dst) 362{ 363 struct netfs_trans *t = dst->trans; 364 365 spin_lock(&t->dst_lock); 366 list_del_init(&dst->trans_entry); 367 spin_unlock(&t->dst_lock); 368 369 netfs_trans_free_dst(dst); 370} 371 372/* 373 * This drops destination transaction entry from appropriate network state 374 * tree and drops related reference counter. It is possible that transaction 375 * will be freed here if its reference counter hits zero. 376 * Destination transaction entry will be freed. 377 */ 378void netfs_trans_drop_trans(struct netfs_trans *t, struct netfs_state *st) 379{ 380 struct netfs_trans_dst *dst, *tmp, *ret = NULL; 381 382 spin_lock(&t->dst_lock); 383 list_for_each_entry_safe(dst, tmp, &t->dst_list, trans_entry) { 384 if (dst->state == st) { 385 ret = dst; 386 list_del(&dst->trans_entry); 387 break; 388 } 389 } 390 spin_unlock(&t->dst_lock); 391 392 if (ret) 393 netfs_trans_remove_dst(ret); 394} 395 396/* 397 * This drops destination transaction entry from appropriate network state 398 * tree and drops related reference counter. It is possible that transaction 399 * will be freed here if its reference counter hits zero. 400 * Destination transaction entry will be freed. 401 */ 402void netfs_trans_drop_last(struct netfs_trans *t, struct netfs_state *st) 403{ 404 struct netfs_trans_dst *dst, *tmp, *ret; 405 406 spin_lock(&t->dst_lock); 407 ret = list_entry(t->dst_list.prev, struct netfs_trans_dst, trans_entry); 408 if (ret->state != st) { 409 ret = NULL; 410 list_for_each_entry_safe(dst, tmp, &t->dst_list, trans_entry) { 411 if (dst->state == st) { 412 ret = dst; 413 list_del_init(&dst->trans_entry); 414 break; 415 } 416 } 417 } else { 418 list_del(&ret->trans_entry); 419 } 420 spin_unlock(&t->dst_lock); 421 422 if (ret) 423 netfs_trans_remove_dst(ret); 424} 425 426static int netfs_trans_push(struct netfs_trans *t, struct netfs_state *st) 427{ 428 int err; 429 430 err = netfs_trans_push_dst(t, st); 431 if (err) 432 return err; 433 434 err = netfs_trans_send(t, st); 435 if (err) 436 goto err_out_free; 437 438 if (t->flags & NETFS_TRANS_SINGLE_DST) 439 pohmelfs_switch_active(st->psb); 440 441 return 0; 442 443err_out_free: 444 t->result = err; 445 netfs_trans_drop_last(t, st); 446 447 return err; 448} 449 450int netfs_trans_finish_send(struct netfs_trans *t, struct pohmelfs_sb *psb) 451{ 452 struct pohmelfs_config *c; 453 int err = -ENODEV; 454 struct netfs_state *st; 455 mutex_lock(&psb->state_lock); 456 list_for_each_entry(c, &psb->state_list, config_entry) { 457 st = &c->state; 458 459 if (t->flags & NETFS_TRANS_SINGLE_DST) { 460 if (!(st->ctl.perm & POHMELFS_IO_PERM_READ)) 461 continue; 462 } else { 463 if (!(st->ctl.perm & POHMELFS_IO_PERM_WRITE)) 464 continue; 465 } 466 467 if (psb->active_state && (psb->active_state->state.ctl.prio >= st->ctl.prio) && 468 (t->flags & NETFS_TRANS_SINGLE_DST)) 469 st = &psb->active_state->state; 470 471 err = netfs_trans_push(t, st); 472 if (!err && (t->flags & NETFS_TRANS_SINGLE_DST)) 473 break; 474 } 475 476 mutex_unlock(&psb->state_lock); 477 if (err) 478 t->result = err; 479 return err; 480} 481 482int netfs_trans_finish(struct netfs_trans *t, struct pohmelfs_sb *psb) 483{ 484 int err; 485 struct netfs_cmd *cmd = t->iovec.iov_base; 486 487 t->gen = atomic_inc_return(&psb->trans_gen); 488 489 cmd->size = t->iovec.iov_len - sizeof(struct netfs_cmd) + 490 t->attached_size + t->attached_pages * sizeof(struct netfs_cmd); 491 cmd->cmd = NETFS_TRANS; 492 cmd->start = t->gen; 493 cmd->id = 0; 494 495 if (psb->perform_crypto) { 496 cmd->ext = psb->crypto_attached_size; 497 cmd->csize = psb->crypto_attached_size; 498 } 499 500 dprintk("%s: t: %u, size: %u, iov_len: %zu, attached_size: %u, attached_pages: %u.\n", 501 __func__, t->gen, cmd->size, t->iovec.iov_len, t->attached_size, t->attached_pages); 502 err = pohmelfs_trans_crypt(t, psb); 503 if (err) { 504 t->result = err; 505 netfs_convert_cmd(cmd); 506 dprintk("%s: trans: %llu, crypto_attached_size: %u, attached_size: %u, attached_pages: %d, trans_size: %u, err: %d.\n", 507 __func__, cmd->start, psb->crypto_attached_size, t->attached_size, t->attached_pages, cmd->size, err); 508 } 509 netfs_trans_put(t); 510 return err; 511} 512 513/* 514 * Resend transaction to remote server(s). 515 * If new servers were added into superblock, we can try to send data 516 * to them too. 517 * 518 * It is called under superblock's state_lock, so we can safely 519 * dereference psb->state_list. Also, transaction's reference counter is 520 * bumped, so it can not go away under us, thus we can safely access all 521 * its members. State is locked. 522 * 523 * This function returns 0 if transaction was successfully sent to at 524 * least one destination target. 525 */ 526int netfs_trans_resend(struct netfs_trans *t, struct pohmelfs_sb *psb) 527{ 528 struct netfs_trans_dst *dst; 529 struct netfs_state *st; 530 struct pohmelfs_config *c; 531 int err, exist, error = -ENODEV; 532 533 list_for_each_entry(c, &psb->state_list, config_entry) { 534 st = &c->state; 535 536 exist = 0; 537 spin_lock(&t->dst_lock); 538 list_for_each_entry(dst, &t->dst_list, trans_entry) { 539 if (st == dst->state) { 540 exist = 1; 541 break; 542 } 543 } 544 spin_unlock(&t->dst_lock); 545 546 if (exist) { 547 if (!(t->flags & NETFS_TRANS_SINGLE_DST) || 548 (c->config_entry.next == &psb->state_list)) { 549 dprintk("%s: resending st: %p, t: %p, gen: %u.\n", 550 __func__, st, t, t->gen); 551 err = netfs_trans_send(t, st); 552 if (!err) 553 error = 0; 554 } 555 continue; 556 } 557 558 dprintk("%s: pushing/resending st: %p, t: %p, gen: %u.\n", 559 __func__, st, t, t->gen); 560 err = netfs_trans_push(t, st); 561 if (err) 562 continue; 563 error = 0; 564 if (t->flags & NETFS_TRANS_SINGLE_DST) 565 break; 566 } 567 568 t->result = error; 569 return error; 570} 571 572void *netfs_trans_add(struct netfs_trans *t, unsigned int size) 573{ 574 struct iovec *io = &t->iovec; 575 void *ptr; 576 577 if (size > t->total_size) { 578 ptr = ERR_PTR(-EINVAL); 579 goto out; 580 } 581 582 if (io->iov_len + size > t->total_size) { 583 dprintk("%s: too big size t: %p, gen: %u, iov_len: %zu, size: %u, total: %u.\n", 584 __func__, t, t->gen, io->iov_len, size, t->total_size); 585 ptr = ERR_PTR(-E2BIG); 586 goto out; 587 } 588 589 ptr = io->iov_base + io->iov_len; 590 io->iov_len += size; 591 592out: 593 dprintk("%s: t: %p, gen: %u, size: %u, total: %zu.\n", 594 __func__, t, t->gen, size, io->iov_len); 595 return ptr; 596} 597 598void netfs_trans_free(struct netfs_trans *t) 599{ 600 if (t->eng) 601 pohmelfs_crypto_thread_make_ready(t->eng->thread); 602 kfree(t); 603} 604 605struct netfs_trans *netfs_trans_alloc(struct pohmelfs_sb *psb, unsigned int size, 606 unsigned int flags, unsigned int nr) 607{ 608 struct netfs_trans *t; 609 unsigned int num, cont, pad, size_no_trans; 610 unsigned int crypto_added = 0; 611 struct netfs_cmd *cmd; 612 613 if (psb->perform_crypto) 614 crypto_added = psb->crypto_attached_size; 615 616 /* 617 * |sizeof(struct netfs_trans)| 618 * |sizeof(struct netfs_cmd)| - transaction header 619 * |size| - buffer with requested size 620 * |padding| - crypto padding, zero bytes 621 * |nr * sizeof(struct page *)| - array of page pointers 622 * 623 * Overall size should be less than PAGE_SIZE for guaranteed allocation. 624 */ 625 626 cont = size; 627 size = ALIGN(size, psb->crypto_align_size); 628 pad = size - cont; 629 630 size_no_trans = size + sizeof(struct netfs_cmd) * 2 + crypto_added; 631 632 cont = sizeof(struct netfs_trans) + size_no_trans; 633 634 num = (PAGE_SIZE - cont)/sizeof(struct page *); 635 636 if (nr > num) 637 nr = num; 638 639 t = kzalloc(cont + nr*sizeof(struct page *), GFP_NOIO); 640 if (!t) 641 goto err_out_exit; 642 643 t->iovec.iov_base = (void *)(t + 1); 644 t->pages = (struct page **)(t->iovec.iov_base + size_no_trans); 645 646 /* 647 * Reserving space for transaction header. 648 */ 649 t->iovec.iov_len = sizeof(struct netfs_cmd) + crypto_added; 650 651 netfs_trans_init_static(t, nr, size_no_trans); 652 653 t->flags = flags; 654 t->psb = psb; 655 656 cmd = (struct netfs_cmd *)t->iovec.iov_base; 657 658 cmd->size = size; 659 cmd->cpad = pad; 660 cmd->csize = crypto_added; 661 662 dprintk("%s: t: %p, gen: %u, size: %u, padding: %u, align_size: %u, flags: %x, " 663 "page_num: %u, base: %p, pages: %p.\n", 664 __func__, t, t->gen, size, pad, psb->crypto_align_size, flags, nr, 665 t->iovec.iov_base, t->pages); 666 667 return t; 668 669err_out_exit: 670 return NULL; 671} 672 673int netfs_trans_init(void) 674{ 675 int err = -ENOMEM; 676 677 netfs_trans_dst = kmem_cache_create("netfs_trans_dst", sizeof(struct netfs_trans_dst), 678 0, 0, NULL); 679 if (!netfs_trans_dst) 680 goto err_out_exit; 681 682 netfs_trans_dst_pool = mempool_create_slab_pool(256, netfs_trans_dst); 683 if (!netfs_trans_dst_pool) 684 goto err_out_free; 685 686 return 0; 687 688err_out_free: 689 kmem_cache_destroy(netfs_trans_dst); 690err_out_exit: 691 return err; 692} 693 694void netfs_trans_exit(void) 695{ 696 mempool_destroy(netfs_trans_dst_pool); 697 kmem_cache_destroy(netfs_trans_dst); 698} 699