1// SPDX-License-Identifier: GPL-2.0-only 2/* Network filesystem high-level write support. 3 * 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8#include <linux/fs.h> 9#include <linux/mm.h> 10#include <linux/pagemap.h> 11#include <linux/slab.h> 12#include <linux/writeback.h> 13#include <linux/pagevec.h> 14#include "internal.h" 15 16/** 17 * netfs_create_write_request - Create a write operation. 18 * @wreq: The write request this is storing from. 19 * @dest: The destination type 20 * @start: Start of the region this write will modify 21 * @len: Length of the modification 22 * @worker: The worker function to handle the write(s) 23 * 24 * Allocate a write operation, set it up and add it to the list on a write 25 * request. 26 */ 27struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq, 28 enum netfs_io_source dest, 29 loff_t start, size_t len, 30 work_func_t worker) 31{ 32 struct netfs_io_subrequest *subreq; 33 34 subreq = netfs_alloc_subrequest(wreq); 35 if (subreq) { 36 INIT_WORK(&subreq->work, worker); 37 subreq->source = dest; 38 subreq->start = start; 39 subreq->len = len; 40 subreq->debug_index = wreq->subreq_counter++; 41 42 switch (subreq->source) { 43 case NETFS_UPLOAD_TO_SERVER: 44 netfs_stat(&netfs_n_wh_upload); 45 break; 46 case NETFS_WRITE_TO_CACHE: 47 netfs_stat(&netfs_n_wh_write); 48 break; 49 default: 50 BUG(); 51 } 52 53 subreq->io_iter = wreq->io_iter; 54 iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start); 55 iov_iter_truncate(&subreq->io_iter, subreq->len); 56 57 trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 58 refcount_read(&subreq->ref), 59 netfs_sreq_trace_new); 60 atomic_inc(&wreq->nr_outstanding); 61 list_add_tail(&subreq->rreq_link, &wreq->subrequests); 62 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 63 } 64 65 return subreq; 66} 67EXPORT_SYMBOL(netfs_create_write_request); 68 69/* 70 * Process a completed write request once all the component operations have 71 * been completed. 72 */ 73static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async) 74{ 75 struct netfs_io_subrequest *subreq; 76 struct netfs_inode *ctx = netfs_inode(wreq->inode); 77 size_t transferred = 0; 78 79 _enter("R=%x[]", wreq->debug_id); 80 81 trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 82 83 list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 84 if (subreq->error || subreq->transferred == 0) 85 break; 86 transferred += subreq->transferred; 87 if (subreq->transferred < subreq->len) 88 break; 89 } 90 wreq->transferred = transferred; 91 92 list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 93 if (!subreq->error) 94 continue; 95 switch (subreq->source) { 96 case NETFS_UPLOAD_TO_SERVER: 97 /* Depending on the type of failure, this may prevent 98 * writeback completion unless we're in disconnected 99 * mode. 100 */ 101 if (!wreq->error) 102 wreq->error = subreq->error; 103 break; 104 105 case NETFS_WRITE_TO_CACHE: 106 /* Failure doesn't prevent writeback completion unless 107 * we're in disconnected mode. 108 */ 109 if (subreq->error != -ENOBUFS) 110 ctx->ops->invalidate_cache(wreq); 111 break; 112 113 default: 114 WARN_ON_ONCE(1); 115 if (!wreq->error) 116 wreq->error = -EIO; 117 return; 118 } 119 } 120 121 wreq->cleanup(wreq); 122 123 if (wreq->origin == NETFS_DIO_WRITE && 124 wreq->mapping->nrpages) { 125 pgoff_t first = wreq->start >> PAGE_SHIFT; 126 pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 127 invalidate_inode_pages2_range(wreq->mapping, first, last); 128 } 129 130 if (wreq->origin == NETFS_DIO_WRITE) 131 inode_dio_end(wreq->inode); 132 133 _debug("finished"); 134 trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 135 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 136 wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 137 138 if (wreq->iocb) { 139 wreq->iocb->ki_pos += transferred; 140 if (wreq->iocb->ki_complete) 141 wreq->iocb->ki_complete( 142 wreq->iocb, wreq->error ? wreq->error : transferred); 143 } 144 145 netfs_clear_subrequests(wreq, was_async); 146 netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete); 147} 148 149/* 150 * Deal with the completion of writing the data to the cache. 151 */ 152void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 153 bool was_async) 154{ 155 struct netfs_io_subrequest *subreq = _op; 156 struct netfs_io_request *wreq = subreq->rreq; 157 unsigned int u; 158 159 _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 160 161 switch (subreq->source) { 162 case NETFS_UPLOAD_TO_SERVER: 163 netfs_stat(&netfs_n_wh_upload_done); 164 break; 165 case NETFS_WRITE_TO_CACHE: 166 netfs_stat(&netfs_n_wh_write_done); 167 break; 168 case NETFS_INVALID_WRITE: 169 break; 170 default: 171 BUG(); 172 } 173 174 if (IS_ERR_VALUE(transferred_or_error)) { 175 subreq->error = transferred_or_error; 176 trace_netfs_failure(wreq, subreq, transferred_or_error, 177 netfs_fail_write); 178 goto failed; 179 } 180 181 if (WARN(transferred_or_error > subreq->len - subreq->transferred, 182 "Subreq excess write: R%x[%x] %zd > %zu - %zu", 183 wreq->debug_id, subreq->debug_index, 184 transferred_or_error, subreq->len, subreq->transferred)) 185 transferred_or_error = subreq->len - subreq->transferred; 186 187 subreq->error = 0; 188 subreq->transferred += transferred_or_error; 189 190 if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) 191 pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n", 192 wreq->debug_id, subreq->debug_index, 193 iov_iter_count(&subreq->io_iter), subreq->len, 194 subreq->transferred, subreq->io_iter.iter_type); 195 196 if (subreq->transferred < subreq->len) 197 goto incomplete; 198 199 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 200out: 201 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 202 203 /* If we decrement nr_outstanding to 0, the ref belongs to us. */ 204 u = atomic_dec_return(&wreq->nr_outstanding); 205 if (u == 0) 206 netfs_write_terminated(wreq, was_async); 207 else if (u == 1) 208 wake_up_var(&wreq->nr_outstanding); 209 210 netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 211 return; 212 213incomplete: 214 if (transferred_or_error == 0) { 215 if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { 216 subreq->error = -ENODATA; 217 goto failed; 218 } 219 } else { 220 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 221 } 222 223 __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); 224 set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 225 goto out; 226 227failed: 228 switch (subreq->source) { 229 case NETFS_WRITE_TO_CACHE: 230 netfs_stat(&netfs_n_wh_write_failed); 231 set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 232 break; 233 case NETFS_UPLOAD_TO_SERVER: 234 netfs_stat(&netfs_n_wh_upload_failed); 235 set_bit(NETFS_RREQ_FAILED, &wreq->flags); 236 wreq->error = subreq->error; 237 break; 238 default: 239 break; 240 } 241 goto out; 242} 243EXPORT_SYMBOL(netfs_write_subrequest_terminated); 244 245static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq) 246{ 247 struct netfs_io_request *wreq = subreq->rreq; 248 struct netfs_cache_resources *cres = &wreq->cache_resources; 249 250 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 251 252 cres->ops->write(cres, subreq->start, &subreq->io_iter, 253 netfs_write_subrequest_terminated, subreq); 254} 255 256static void netfs_write_to_cache_op_worker(struct work_struct *work) 257{ 258 struct netfs_io_subrequest *subreq = 259 container_of(work, struct netfs_io_subrequest, work); 260 261 netfs_write_to_cache_op(subreq); 262} 263 264/** 265 * netfs_queue_write_request - Queue a write request for attention 266 * @subreq: The write request to be queued 267 * 268 * Queue the specified write request for processing by a worker thread. We 269 * pass the caller's ref on the request to the worker thread. 270 */ 271void netfs_queue_write_request(struct netfs_io_subrequest *subreq) 272{ 273 if (!queue_work(system_unbound_wq, &subreq->work)) 274 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip); 275} 276EXPORT_SYMBOL(netfs_queue_write_request); 277 278/* 279 * Set up a op for writing to the cache. 280 */ 281static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq) 282{ 283 struct netfs_cache_resources *cres = &wreq->cache_resources; 284 struct netfs_io_subrequest *subreq; 285 struct netfs_inode *ctx = netfs_inode(wreq->inode); 286 struct fscache_cookie *cookie = netfs_i_cookie(ctx); 287 loff_t start = wreq->start; 288 size_t len = wreq->len; 289 int ret; 290 291 if (!fscache_cookie_enabled(cookie)) { 292 clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags); 293 return; 294 } 295 296 _debug("write to cache"); 297 ret = fscache_begin_write_operation(cres, cookie); 298 if (ret < 0) 299 return; 300 301 ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len, 302 i_size_read(wreq->inode), true); 303 if (ret < 0) 304 return; 305 306 subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len, 307 netfs_write_to_cache_op_worker); 308 if (!subreq) 309 return; 310 311 netfs_write_to_cache_op(subreq); 312} 313 314/* 315 * Begin the process of writing out a chunk of data. 316 * 317 * We are given a write request that holds a series of dirty regions and 318 * (partially) covers a sequence of folios, all of which are present. The 319 * pages must have been marked as writeback as appropriate. 320 * 321 * We need to perform the following steps: 322 * 323 * (1) If encrypting, create an output buffer and encrypt each block of the 324 * data into it, otherwise the output buffer will point to the original 325 * folios. 326 * 327 * (2) If the data is to be cached, set up a write op for the entire output 328 * buffer to the cache, if the cache wants to accept it. 329 * 330 * (3) If the data is to be uploaded (ie. not merely cached): 331 * 332 * (a) If the data is to be compressed, create a compression buffer and 333 * compress the data into it. 334 * 335 * (b) For each destination we want to upload to, set up write ops to write 336 * to that destination. We may need multiple writes if the data is not 337 * contiguous or the span exceeds wsize for a server. 338 */ 339int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, 340 enum netfs_write_trace what) 341{ 342 struct netfs_inode *ctx = netfs_inode(wreq->inode); 343 344 _enter("R=%x %llx-%llx f=%lx", 345 wreq->debug_id, wreq->start, wreq->start + wreq->len - 1, 346 wreq->flags); 347 348 trace_netfs_write(wreq, what); 349 if (wreq->len == 0 || wreq->iter.count == 0) { 350 pr_err("Zero-sized write [R=%x]\n", wreq->debug_id); 351 return -EIO; 352 } 353 354 if (wreq->origin == NETFS_DIO_WRITE) 355 inode_dio_begin(wreq->inode); 356 357 wreq->io_iter = wreq->iter; 358 359 /* ->outstanding > 0 carries a ref */ 360 netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 361 atomic_set(&wreq->nr_outstanding, 1); 362 363 /* Start the encryption/compression going. We can do that in the 364 * background whilst we generate a list of write ops that we want to 365 * perform. 366 */ 367 // TODO: Encrypt or compress the region as appropriate 368 369 /* We need to write all of the region to the cache */ 370 if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) 371 netfs_set_up_write_to_cache(wreq); 372 373 /* However, we don't necessarily write all of the region to the server. 374 * Caching of reads is being managed this way also. 375 */ 376 if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 377 ctx->ops->create_write_requests(wreq, wreq->start, wreq->len); 378 379 if (atomic_dec_and_test(&wreq->nr_outstanding)) 380 netfs_write_terminated(wreq, false); 381 382 if (!may_wait) 383 return -EIOCBQUEUED; 384 385 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 386 TASK_UNINTERRUPTIBLE); 387 return wreq->error; 388} 389 390/* 391 * Begin a write operation for writing through the pagecache. 392 */ 393struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 394{ 395 struct netfs_io_request *wreq; 396 struct file *file = iocb->ki_filp; 397 398 wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len, 399 NETFS_WRITETHROUGH); 400 if (IS_ERR(wreq)) 401 return wreq; 402 403 trace_netfs_write(wreq, netfs_write_trace_writethrough); 404 405 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 406 iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0); 407 wreq->io_iter = wreq->iter; 408 409 /* ->outstanding > 0 carries a ref */ 410 netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 411 atomic_set(&wreq->nr_outstanding, 1); 412 return wreq; 413} 414 415static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final) 416{ 417 struct netfs_inode *ictx = netfs_inode(wreq->inode); 418 unsigned long long start; 419 size_t len; 420 421 if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 422 return; 423 424 start = wreq->start + wreq->submitted; 425 len = wreq->iter.count - wreq->submitted; 426 if (!final) { 427 len /= wreq->wsize; /* Round to number of maximum packets */ 428 len *= wreq->wsize; 429 } 430 431 ictx->ops->create_write_requests(wreq, start, len); 432 wreq->submitted += len; 433} 434 435/* 436 * Advance the state of the write operation used when writing through the 437 * pagecache. Data has been copied into the pagecache that we need to append 438 * to the request. If we've added more than wsize then we need to create a new 439 * subrequest. 440 */ 441int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end) 442{ 443 _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u", 444 wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end); 445 446 wreq->iter.count += copied; 447 wreq->io_iter.count += copied; 448 if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize) 449 netfs_submit_writethrough(wreq, false); 450 451 return wreq->error; 452} 453 454/* 455 * End a write operation used when writing through the pagecache. 456 */ 457int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb) 458{ 459 int ret = -EIOCBQUEUED; 460 461 _enter("ic=%zu sb=%zu ws=%u", 462 wreq->iter.count, wreq->submitted, wreq->wsize); 463 464 if (wreq->submitted < wreq->io_iter.count) 465 netfs_submit_writethrough(wreq, true); 466 467 if (atomic_dec_and_test(&wreq->nr_outstanding)) 468 netfs_write_terminated(wreq, false); 469 470 if (is_sync_kiocb(iocb)) { 471 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 472 TASK_UNINTERRUPTIBLE); 473 ret = wreq->error; 474 } 475 476 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 477 return ret; 478} 479