1/* vi: set sw=4 ts=4: */ 2/* 3 * unix_io.c --- This is the Unix (well, really POSIX) implementation 4 * of the I/O manager. 5 * 6 * Implements a one-block write-through cache. 7 * 8 * Includes support for Windows NT support under Cygwin. 9 * 10 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 11 * 2002 by Theodore Ts'o. 12 * 13 * %Begin-Header% 14 * This file may be redistributed under the terms of the GNU Public 15 * License. 16 * %End-Header% 17 */ 18 19#include <stdio.h> 20#include <string.h> 21#if HAVE_UNISTD_H 22#include <unistd.h> 23#endif 24#if HAVE_ERRNO_H 25#include <errno.h> 26#endif 27#include <fcntl.h> 28#include <time.h> 29#ifdef __linux__ 30#include <sys/utsname.h> 31#endif 32#if HAVE_SYS_STAT_H 33#include <sys/stat.h> 34#endif 35#if HAVE_SYS_TYPES_H 36#include <sys/types.h> 37#endif 38#include <sys/resource.h> 39 40#include "ext2_fs.h" 41#include "ext2fs.h" 42 43/* 44 * For checking structure magic numbers... 45 */ 46 47#define EXT2_CHECK_MAGIC(struct, code) \ 48 if ((struct)->magic != (code)) return (code) 49 50struct unix_cache { 51 char *buf; 52 unsigned long block; 53 int access_time; 54 unsigned dirty:1; 55 unsigned in_use:1; 56}; 57 58#define CACHE_SIZE 8 59#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 60#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 61 62struct unix_private_data { 63 int magic; 64 int dev; 65 int flags; 66 int access_time; 67 ext2_loff_t offset; 68 struct unix_cache cache[CACHE_SIZE]; 69}; 70 71static errcode_t unix_open(const char *name, int flags, io_channel *channel); 72static errcode_t unix_close(io_channel channel); 73static errcode_t unix_set_blksize(io_channel channel, int blksize); 74static errcode_t unix_read_blk(io_channel channel, unsigned long block, 75 int count, void *data); 76static errcode_t unix_write_blk(io_channel channel, unsigned long block, 77 int count, const void *data); 78static errcode_t unix_flush(io_channel channel); 79static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 80 int size, const void *data); 81static errcode_t unix_set_option(io_channel channel, const char *option, 82 const char *arg); 83 84static void reuse_cache(io_channel channel, struct unix_private_data *data, 85 struct unix_cache *cache, unsigned long block); 86 87/* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel 88 * does not know buffered block devices - everything is raw. */ 89#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 90#define NEED_BOUNCE_BUFFER 91#else 92#undef NEED_BOUNCE_BUFFER 93#endif 94 95static struct struct_io_manager struct_unix_manager = { 96 EXT2_ET_MAGIC_IO_MANAGER, 97 "Unix I/O Manager", 98 unix_open, 99 unix_close, 100 unix_set_blksize, 101 unix_read_blk, 102 unix_write_blk, 103 unix_flush, 104#ifdef NEED_BOUNCE_BUFFER 105 0, 106#else 107 unix_write_byte, 108#endif 109 unix_set_option 110}; 111 112io_manager unix_io_manager = &struct_unix_manager; 113 114/* 115 * Here are the raw I/O functions 116 */ 117#ifndef NEED_BOUNCE_BUFFER 118static errcode_t raw_read_blk(io_channel channel, 119 struct unix_private_data *data, 120 unsigned long block, 121 int count, void *buf) 122{ 123 errcode_t retval; 124 ssize_t size; 125 ext2_loff_t location; 126 int actual = 0; 127 128 size = (count < 0) ? -count : count * channel->block_size; 129 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 130 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 131 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 132 goto error_out; 133 } 134 actual = read(data->dev, buf, size); 135 if (actual != size) { 136 if (actual < 0) 137 actual = 0; 138 retval = EXT2_ET_SHORT_READ; 139 goto error_out; 140 } 141 return 0; 142 143error_out: 144 memset((char *) buf+actual, 0, size-actual); 145 if (channel->read_error) 146 retval = (channel->read_error)(channel, block, count, buf, 147 size, actual, retval); 148 return retval; 149} 150#else /* NEED_BOUNCE_BUFFER */ 151/* 152 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size 153 */ 154static errcode_t raw_read_blk(io_channel channel, 155 struct unix_private_data *data, 156 unsigned long block, 157 int count, void *buf) 158{ 159 errcode_t retval; 160 size_t size, alignsize, fragment; 161 ext2_loff_t location; 162 int total = 0, actual; 163#define BLOCKALIGN 512 164 char sector[BLOCKALIGN]; 165 166 size = (count < 0) ? -count : count * channel->block_size; 167 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 168#ifdef DEBUG 169 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n", 170 count, size, block, channel->block_size, location); 171#endif 172 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 173 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 174 goto error_out; 175 } 176 fragment = size % BLOCKALIGN; 177 alignsize = size - fragment; 178 if (alignsize) { 179 actual = read(data->dev, buf, alignsize); 180 if (actual != alignsize) 181 goto short_read; 182 } 183 if (fragment) { 184 actual = read(data->dev, sector, BLOCKALIGN); 185 if (actual != BLOCKALIGN) 186 goto short_read; 187 memcpy(buf+alignsize, sector, fragment); 188 } 189 return 0; 190 191short_read: 192 if (actual>0) 193 total += actual; 194 retval = EXT2_ET_SHORT_READ; 195 196error_out: 197 memset((char *) buf+total, 0, size-actual); 198 if (channel->read_error) 199 retval = (channel->read_error)(channel, block, count, buf, 200 size, actual, retval); 201 return retval; 202} 203#endif 204 205static errcode_t raw_write_blk(io_channel channel, 206 struct unix_private_data *data, 207 unsigned long block, 208 int count, const void *buf) 209{ 210 ssize_t size; 211 ext2_loff_t location; 212 int actual = 0; 213 errcode_t retval; 214 215 if (count == 1) 216 size = channel->block_size; 217 else { 218 if (count < 0) 219 size = -count; 220 else 221 size = count * channel->block_size; 222 } 223 224 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 225 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 226 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 227 goto error_out; 228 } 229 230 actual = write(data->dev, buf, size); 231 if (actual != size) { 232 retval = EXT2_ET_SHORT_WRITE; 233 goto error_out; 234 } 235 return 0; 236 237error_out: 238 if (channel->write_error) 239 retval = (channel->write_error)(channel, block, count, buf, 240 size, actual, retval); 241 return retval; 242} 243 244 245/* 246 * Here we implement the cache functions 247 */ 248 249/* Allocate the cache buffers */ 250static errcode_t alloc_cache(io_channel channel, 251 struct unix_private_data *data) 252{ 253 errcode_t retval; 254 struct unix_cache *cache; 255 int i; 256 257 data->access_time = 0; 258 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 259 cache->block = 0; 260 cache->access_time = 0; 261 cache->dirty = 0; 262 cache->in_use = 0; 263 if ((retval = ext2fs_get_mem(channel->block_size, 264 &cache->buf))) 265 return retval; 266 } 267 return 0; 268} 269 270/* Free the cache buffers */ 271static void free_cache(struct unix_private_data *data) 272{ 273 struct unix_cache *cache; 274 int i; 275 276 data->access_time = 0; 277 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 278 cache->block = 0; 279 cache->access_time = 0; 280 cache->dirty = 0; 281 cache->in_use = 0; 282 ext2fs_free_mem(&cache->buf); 283 cache->buf = 0; 284 } 285} 286 287#ifndef NO_IO_CACHE 288/* 289 * Try to find a block in the cache. If the block is not found, and 290 * eldest is a non-zero pointer, then fill in eldest with the cache 291 * entry to that should be reused. 292 */ 293static struct unix_cache *find_cached_block(struct unix_private_data *data, 294 unsigned long block, 295 struct unix_cache **eldest) 296{ 297 struct unix_cache *cache, *unused_cache, *oldest_cache; 298 int i; 299 300 unused_cache = oldest_cache = 0; 301 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 302 if (!cache->in_use) { 303 if (!unused_cache) 304 unused_cache = cache; 305 continue; 306 } 307 if (cache->block == block) { 308 cache->access_time = ++data->access_time; 309 return cache; 310 } 311 if (!oldest_cache || 312 (cache->access_time < oldest_cache->access_time)) 313 oldest_cache = cache; 314 } 315 if (eldest) 316 *eldest = (unused_cache) ? unused_cache : oldest_cache; 317 return 0; 318} 319 320/* 321 * Reuse a particular cache entry for another block. 322 */ 323static void reuse_cache(io_channel channel, struct unix_private_data *data, 324 struct unix_cache *cache, unsigned long block) 325{ 326 if (cache->dirty && cache->in_use) 327 raw_write_blk(channel, data, cache->block, 1, cache->buf); 328 329 cache->in_use = 1; 330 cache->dirty = 0; 331 cache->block = block; 332 cache->access_time = ++data->access_time; 333} 334 335/* 336 * Flush all of the blocks in the cache 337 */ 338static errcode_t flush_cached_blocks(io_channel channel, 339 struct unix_private_data *data, 340 int invalidate) 341 342{ 343 struct unix_cache *cache; 344 errcode_t retval, retval2; 345 int i; 346 347 retval2 = 0; 348 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 349 if (!cache->in_use) 350 continue; 351 352 if (invalidate) 353 cache->in_use = 0; 354 355 if (!cache->dirty) 356 continue; 357 358 retval = raw_write_blk(channel, data, 359 cache->block, 1, cache->buf); 360 if (retval) 361 retval2 = retval; 362 else 363 cache->dirty = 0; 364 } 365 return retval2; 366} 367#endif /* NO_IO_CACHE */ 368 369static errcode_t unix_open(const char *name, int flags, io_channel *channel) 370{ 371 io_channel io = NULL; 372 struct unix_private_data *data = NULL; 373 errcode_t retval; 374 int open_flags; 375 struct stat st; 376#ifdef __linux__ 377 struct utsname ut; 378#endif 379 380 if (name == 0) 381 return EXT2_ET_BAD_DEVICE_NAME; 382 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 383 if (retval) 384 return retval; 385 memset(io, 0, sizeof(struct struct_io_channel)); 386 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 387 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 388 if (retval) 389 goto cleanup; 390 391 io->manager = unix_io_manager; 392 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 393 if (retval) 394 goto cleanup; 395 396 strcpy(io->name, name); 397 io->private_data = data; 398 io->block_size = 1024; 399 io->read_error = 0; 400 io->write_error = 0; 401 io->refcount = 1; 402 403 memset(data, 0, sizeof(struct unix_private_data)); 404 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 405 406 if ((retval = alloc_cache(io, data))) 407 goto cleanup; 408 409 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 410#ifdef CONFIG_LFS 411 data->dev = open64(io->name, open_flags); 412#else 413 data->dev = open(io->name, open_flags); 414#endif 415 if (data->dev < 0) { 416 retval = errno; 417 goto cleanup; 418 } 419 420#ifdef __linux__ 421#undef RLIM_INFINITY 422#if (defined(__alpha__) || (defined(__sparc__) && (__WORDSIZE == 32)) || (defined(__mips__) && (_MIPS_SZLONG == 32))) 423#define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 424#else 425#define RLIM_INFINITY (~0UL) 426#endif 427 /* 428 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 429 * block devices are wrongly getting hit by the filesize 430 * limit. This workaround isn't perfect, since it won't work 431 * if glibc wasn't built against 2.2 header files. (Sigh.) 432 * 433 */ 434 if ((flags & IO_FLAG_RW) && 435 (uname(&ut) == 0) && 436 ((ut.release[0] == '2') && (ut.release[1] == '.') && 437 (ut.release[2] == '4') && (ut.release[3] == '.') && 438 (ut.release[4] == '1') && (ut.release[5] >= '0') && 439 (ut.release[5] < '8')) && 440 (fstat(data->dev, &st) == 0) && 441 (S_ISBLK(st.st_mode))) { 442 struct rlimit rlim; 443 444 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 445 setrlimit(RLIMIT_FSIZE, &rlim); 446 getrlimit(RLIMIT_FSIZE, &rlim); 447 if (((unsigned long) rlim.rlim_cur) < 448 ((unsigned long) rlim.rlim_max)) { 449 rlim.rlim_cur = rlim.rlim_max; 450 setrlimit(RLIMIT_FSIZE, &rlim); 451 } 452 } 453#endif 454 *channel = io; 455 return 0; 456 457cleanup: 458 if (data) { 459 free_cache(data); 460 ext2fs_free_mem(&data); 461 } 462 ext2fs_free_mem(&io); 463 return retval; 464} 465 466static errcode_t unix_close(io_channel channel) 467{ 468 struct unix_private_data *data; 469 errcode_t retval = 0; 470 471 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 472 data = (struct unix_private_data *) channel->private_data; 473 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 474 475 if (--channel->refcount > 0) 476 return 0; 477 478#ifndef NO_IO_CACHE 479 retval = flush_cached_blocks(channel, data, 0); 480#endif 481 482 if (close(data->dev) < 0) 483 retval = errno; 484 free_cache(data); 485 486 ext2fs_free_mem(&channel->private_data); 487 ext2fs_free_mem(&channel->name); 488 ext2fs_free_mem(&channel); 489 return retval; 490} 491 492static errcode_t unix_set_blksize(io_channel channel, int blksize) 493{ 494 struct unix_private_data *data; 495 errcode_t retval; 496 497 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 498 data = (struct unix_private_data *) channel->private_data; 499 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 500 501 if (channel->block_size != blksize) { 502#ifndef NO_IO_CACHE 503 if ((retval = flush_cached_blocks(channel, data, 0))) 504 return retval; 505#endif 506 507 channel->block_size = blksize; 508 free_cache(data); 509 if ((retval = alloc_cache(channel, data))) 510 return retval; 511 } 512 return 0; 513} 514 515 516static errcode_t unix_read_blk(io_channel channel, unsigned long block, 517 int count, void *buf) 518{ 519 struct unix_private_data *data; 520 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 521 errcode_t retval; 522 char *cp; 523 int i, j; 524 525 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 526 data = (struct unix_private_data *) channel->private_data; 527 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 528 529#ifdef NO_IO_CACHE 530 return raw_read_blk(channel, data, block, count, buf); 531#else 532 /* 533 * If we're doing an odd-sized read or a very large read, 534 * flush out the cache and then do a direct read. 535 */ 536 if (count < 0 || count > WRITE_DIRECT_SIZE) { 537 if ((retval = flush_cached_blocks(channel, data, 0))) 538 return retval; 539 return raw_read_blk(channel, data, block, count, buf); 540 } 541 542 cp = buf; 543 while (count > 0) { 544 /* If it's in the cache, use it! */ 545 if ((cache = find_cached_block(data, block, &reuse[0]))) { 546#ifdef DEBUG 547 printf("Using cached block %d\n", block); 548#endif 549 memcpy(cp, cache->buf, channel->block_size); 550 count--; 551 block++; 552 cp += channel->block_size; 553 continue; 554 } 555 /* 556 * Find the number of uncached blocks so we can do a 557 * single read request 558 */ 559 for (i=1; i < count; i++) 560 if (find_cached_block(data, block+i, &reuse[i])) 561 break; 562#ifdef DEBUG 563 printf("Reading %d blocks starting at %d\n", i, block); 564#endif 565 if ((retval = raw_read_blk(channel, data, block, i, cp))) 566 return retval; 567 568 /* Save the results in the cache */ 569 for (j=0; j < i; j++) { 570 count--; 571 cache = reuse[j]; 572 reuse_cache(channel, data, cache, block++); 573 memcpy(cache->buf, cp, channel->block_size); 574 cp += channel->block_size; 575 } 576 } 577 return 0; 578#endif /* NO_IO_CACHE */ 579} 580 581static errcode_t unix_write_blk(io_channel channel, unsigned long block, 582 int count, const void *buf) 583{ 584 struct unix_private_data *data; 585 struct unix_cache *cache, *reuse; 586 errcode_t retval = 0; 587 const char *cp; 588 int writethrough; 589 590 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 591 data = (struct unix_private_data *) channel->private_data; 592 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 593 594#ifdef NO_IO_CACHE 595 return raw_write_blk(channel, data, block, count, buf); 596#else 597 /* 598 * If we're doing an odd-sized write or a very large write, 599 * flush out the cache completely and then do a direct write. 600 */ 601 if (count < 0 || count > WRITE_DIRECT_SIZE) { 602 if ((retval = flush_cached_blocks(channel, data, 1))) 603 return retval; 604 return raw_write_blk(channel, data, block, count, buf); 605 } 606 607 /* 608 * For a moderate-sized multi-block write, first force a write 609 * if we're in write-through cache mode, and then fill the 610 * cache with the blocks. 611 */ 612 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 613 if (writethrough) 614 retval = raw_write_blk(channel, data, block, count, buf); 615 616 cp = buf; 617 while (count > 0) { 618 cache = find_cached_block(data, block, &reuse); 619 if (!cache) { 620 cache = reuse; 621 reuse_cache(channel, data, cache, block); 622 } 623 memcpy(cache->buf, cp, channel->block_size); 624 cache->dirty = !writethrough; 625 count--; 626 block++; 627 cp += channel->block_size; 628 } 629 return retval; 630#endif /* NO_IO_CACHE */ 631} 632 633static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 634 int size, const void *buf) 635{ 636 struct unix_private_data *data; 637 errcode_t retval = 0; 638 ssize_t actual; 639 640 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 641 data = (struct unix_private_data *) channel->private_data; 642 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 643 644#ifndef NO_IO_CACHE 645 /* 646 * Flush out the cache completely 647 */ 648 if ((retval = flush_cached_blocks(channel, data, 1))) 649 return retval; 650#endif 651 652 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) 653 return errno; 654 655 actual = write(data->dev, buf, size); 656 if (actual != size) 657 return EXT2_ET_SHORT_WRITE; 658 659 return 0; 660} 661 662/* 663 * Flush data buffers to disk. 664 */ 665static errcode_t unix_flush(io_channel channel) 666{ 667 struct unix_private_data *data; 668 errcode_t retval = 0; 669 670 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 671 data = (struct unix_private_data *) channel->private_data; 672 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 673 674#ifndef NO_IO_CACHE 675 retval = flush_cached_blocks(channel, data, 0); 676#endif 677 fsync(data->dev); 678 return retval; 679} 680 681static errcode_t unix_set_option(io_channel channel, const char *option, 682 const char *arg) 683{ 684 struct unix_private_data *data; 685 unsigned long tmp; 686 char *end; 687 688 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 689 data = (struct unix_private_data *) channel->private_data; 690 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 691 692 if (!strcmp(option, "offset")) { 693 if (!arg) 694 return EXT2_ET_INVALID_ARGUMENT; 695 696 tmp = strtoul(arg, &end, 0); 697 if (*end) 698 return EXT2_ET_INVALID_ARGUMENT; 699 data->offset = tmp; 700 return 0; 701 } 702 return EXT2_ET_INVALID_ARGUMENT; 703} 704