1/* vi: set sw=4 ts=4: */ 2/* 3 * unix_io.c --- This is the Unix (well, really POSIX) implementation 4 * of the I/O manager. 5 * 6 * Implements a one-block write-through cache. 7 * 8 * Includes support for Windows NT support under Cygwin. 9 * 10 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 11 * 2002 by Theodore Ts'o. 12 * 13 * %Begin-Header% 14 * This file may be redistributed under the terms of the GNU Public 15 * License. 16 * %End-Header% 17 */ 18 19#include <stdio.h> 20#include <string.h> 21#if HAVE_UNISTD_H 22#include <unistd.h> 23#endif 24#if HAVE_ERRNO_H 25#include <errno.h> 26#endif 27#include <fcntl.h> 28#include <time.h> 29#ifdef __linux__ 30#include <sys/utsname.h> 31#endif 32#if HAVE_SYS_STAT_H 33#include <sys/stat.h> 34#endif 35#if HAVE_SYS_TYPES_H 36#include <sys/types.h> 37#endif 38#include <sys/resource.h> 39 40#include "ext2_fs.h" 41#include "ext2fs.h" 42 43/* 44 * For checking structure magic numbers... 45 */ 46 47#define EXT2_CHECK_MAGIC(struct, code) \ 48 if ((struct)->magic != (code)) return (code) 49 50struct unix_cache { 51 char *buf; 52 unsigned long block; 53 int access_time; 54 unsigned dirty:1; 55 unsigned in_use:1; 56}; 57 58#define CACHE_SIZE 8 59#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 60#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 61 62struct unix_private_data { 63 int magic; 64 int dev; 65 int flags; 66 int access_time; 67 ext2_loff_t offset; 68 struct unix_cache cache[CACHE_SIZE]; 69}; 70 71static errcode_t unix_open(const char *name, int flags, io_channel *channel); 72static errcode_t unix_close(io_channel channel); 73static errcode_t unix_set_blksize(io_channel channel, int blksize); 74static errcode_t unix_read_blk(io_channel channel, unsigned long block, 75 int count, void *data); 76static errcode_t unix_write_blk(io_channel channel, unsigned long block, 77 int count, const void *data); 78static errcode_t unix_flush(io_channel channel); 79static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 80 int size, const void *data); 81static errcode_t unix_set_option(io_channel channel, const char *option, 82 const char *arg); 83 84static void reuse_cache(io_channel channel, struct unix_private_data *data, 85 struct unix_cache *cache, unsigned long block); 86 87/* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel 88 * does not know buffered block devices - everything is raw. */ 89#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 90#define NEED_BOUNCE_BUFFER 91#else 92#undef NEED_BOUNCE_BUFFER 93#endif 94 95static struct struct_io_manager struct_unix_manager = { 96 EXT2_ET_MAGIC_IO_MANAGER, 97 "Unix I/O Manager", 98 unix_open, 99 unix_close, 100 unix_set_blksize, 101 unix_read_blk, 102 unix_write_blk, 103 unix_flush, 104#ifdef NEED_BOUNCE_BUFFER 105 0, 106#else 107 unix_write_byte, 108#endif 109 unix_set_option 110}; 111 112io_manager unix_io_manager = &struct_unix_manager; 113 114/* 115 * Here are the raw I/O functions 116 */ 117#ifndef NEED_BOUNCE_BUFFER 118static errcode_t raw_read_blk(io_channel channel, 119 struct unix_private_data *data, 120 unsigned long block, 121 int count, void *buf) 122{ 123 errcode_t retval; 124 ssize_t size; 125 ext2_loff_t location; 126 int actual = 0; 127 128 size = (count < 0) ? -count : count * channel->block_size; 129 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 130 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 131 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 132 goto error_out; 133 } 134 actual = read(data->dev, buf, size); 135 if (actual != size) { 136 if (actual < 0) 137 actual = 0; 138 retval = EXT2_ET_SHORT_READ; 139 goto error_out; 140 } 141 return 0; 142 143error_out: 144 memset((char *) buf+actual, 0, size-actual); 145 if (channel->read_error) 146 retval = (channel->read_error)(channel, block, count, buf, 147 size, actual, retval); 148 return retval; 149} 150#else /* NEED_BOUNCE_BUFFER */ 151/* 152 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size 153 */ 154static errcode_t raw_read_blk(io_channel channel, 155 struct unix_private_data *data, 156 unsigned long block, 157 int count, void *buf) 158{ 159 errcode_t retval; 160 size_t size, alignsize, fragment; 161 ext2_loff_t location; 162 int total = 0, actual; 163#define BLOCKALIGN 512 164 char sector[BLOCKALIGN]; 165 166 size = (count < 0) ? -count : count * channel->block_size; 167 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 168#ifdef DEBUG 169 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n", 170 count, size, block, channel->block_size, location); 171#endif 172 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 173 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 174 goto error_out; 175 } 176 fragment = size % BLOCKALIGN; 177 alignsize = size - fragment; 178 if (alignsize) { 179 actual = read(data->dev, buf, alignsize); 180 if (actual != alignsize) 181 goto short_read; 182 } 183 if (fragment) { 184 actual = read(data->dev, sector, BLOCKALIGN); 185 if (actual != BLOCKALIGN) 186 goto short_read; 187 memcpy(buf+alignsize, sector, fragment); 188 } 189 return 0; 190 191short_read: 192 if (actual>0) 193 total += actual; 194 retval = EXT2_ET_SHORT_READ; 195 196error_out: 197 memset((char *) buf+total, 0, size-actual); 198 if (channel->read_error) 199 retval = (channel->read_error)(channel, block, count, buf, 200 size, actual, retval); 201 return retval; 202} 203#endif 204 205static errcode_t raw_write_blk(io_channel channel, 206 struct unix_private_data *data, 207 unsigned long block, 208 int count, const void *buf) 209{ 210 ssize_t size; 211 ext2_loff_t location; 212 int actual = 0; 213 errcode_t retval; 214 215 if (count == 1) 216 size = channel->block_size; 217 else { 218 if (count < 0) 219 size = -count; 220 else 221 size = count * channel->block_size; 222 } 223 224 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 225 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 226 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 227 goto error_out; 228 } 229 230 actual = write(data->dev, buf, size); 231 if (actual != size) { 232 retval = EXT2_ET_SHORT_WRITE; 233 goto error_out; 234 } 235 return 0; 236 237error_out: 238 if (channel->write_error) 239 retval = (channel->write_error)(channel, block, count, buf, 240 size, actual, retval); 241 return retval; 242} 243 244 245/* 246 * Here we implement the cache functions 247 */ 248 249/* Allocate the cache buffers */ 250static errcode_t alloc_cache(io_channel channel, 251 struct unix_private_data *data) 252{ 253 errcode_t retval; 254 struct unix_cache *cache; 255 int i; 256 257 data->access_time = 0; 258 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 259 cache->block = 0; 260 cache->access_time = 0; 261 cache->dirty = 0; 262 cache->in_use = 0; 263 if ((retval = ext2fs_get_mem(channel->block_size, 264 &cache->buf))) 265 return retval; 266 } 267 return 0; 268} 269 270/* Free the cache buffers */ 271static void free_cache(struct unix_private_data *data) 272{ 273 struct unix_cache *cache; 274 int i; 275 276 data->access_time = 0; 277 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 278 cache->block = 0; 279 cache->access_time = 0; 280 cache->dirty = 0; 281 cache->in_use = 0; 282 ext2fs_free_mem(&cache->buf); 283 cache->buf = 0; 284 } 285} 286 287#ifndef NO_IO_CACHE 288/* 289 * Try to find a block in the cache. If the block is not found, and 290 * eldest is a non-zero pointer, then fill in eldest with the cache 291 * entry to that should be reused. 292 */ 293static struct unix_cache *find_cached_block(struct unix_private_data *data, 294 unsigned long block, 295 struct unix_cache **eldest) 296{ 297 struct unix_cache *cache, *unused_cache, *oldest_cache; 298 int i; 299 300 unused_cache = oldest_cache = 0; 301 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 302 if (!cache->in_use) { 303 if (!unused_cache) 304 unused_cache = cache; 305 continue; 306 } 307 if (cache->block == block) { 308 cache->access_time = ++data->access_time; 309 return cache; 310 } 311 if (!oldest_cache || 312 (cache->access_time < oldest_cache->access_time)) 313 oldest_cache = cache; 314 } 315 if (eldest) 316 *eldest = (unused_cache) ? unused_cache : oldest_cache; 317 return 0; 318} 319 320/* 321 * Reuse a particular cache entry for another block. 322 */ 323static void reuse_cache(io_channel channel, struct unix_private_data *data, 324 struct unix_cache *cache, unsigned long block) 325{ 326 if (cache->dirty && cache->in_use) 327 raw_write_blk(channel, data, cache->block, 1, cache->buf); 328 329 cache->in_use = 1; 330 cache->dirty = 0; 331 cache->block = block; 332 cache->access_time = ++data->access_time; 333} 334 335/* 336 * Flush all of the blocks in the cache 337 */ 338static errcode_t flush_cached_blocks(io_channel channel, 339 struct unix_private_data *data, 340 int invalidate) 341 342{ 343 struct unix_cache *cache; 344 errcode_t retval, retval2; 345 int i; 346 347 retval2 = 0; 348 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 349 if (!cache->in_use) 350 continue; 351 352 if (invalidate) 353 cache->in_use = 0; 354 355 if (!cache->dirty) 356 continue; 357 358 retval = raw_write_blk(channel, data, 359 cache->block, 1, cache->buf); 360 if (retval) 361 retval2 = retval; 362 else 363 cache->dirty = 0; 364 } 365 return retval2; 366} 367#endif /* NO_IO_CACHE */ 368 369static errcode_t unix_open(const char *name, int flags, io_channel *channel) 370{ 371 io_channel io = NULL; 372 struct unix_private_data *data = NULL; 373 errcode_t retval; 374 int open_flags; 375 struct stat st; 376#ifdef __linux__ 377 struct utsname ut; 378#endif 379 380 if (name == 0) 381 return EXT2_ET_BAD_DEVICE_NAME; 382 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 383 if (retval) 384 return retval; 385 memset(io, 0, sizeof(struct struct_io_channel)); 386 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 387 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 388 if (retval) 389 goto cleanup; 390 391 io->manager = unix_io_manager; 392 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 393 if (retval) 394 goto cleanup; 395 396 strcpy(io->name, name); 397 io->private_data = data; 398 io->block_size = 1024; 399 io->read_error = 0; 400 io->write_error = 0; 401 io->refcount = 1; 402 403 memset(data, 0, sizeof(struct unix_private_data)); 404 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 405 406 if ((retval = alloc_cache(io, data))) 407 goto cleanup; 408 409 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 410#ifdef CONFIG_LFS 411 data->dev = open64(io->name, open_flags); 412#else 413 data->dev = open(io->name, open_flags); 414#endif 415 if (data->dev < 0) { 416 retval = errno; 417 goto cleanup; 418 } 419 420#ifdef __linux__ 421#undef RLIM_INFINITY 422#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG \ 423 == 4))) 424#define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 425#else 426#define RLIM_INFINITY (~0UL) 427#endif 428 if ((flags & IO_FLAG_RW) && 429 (uname(&ut) == 0) && 430 ((ut.release[0] == '2') && (ut.release[1] == '.') && 431 (ut.release[2] == '4') && (ut.release[3] == '.') && 432 (ut.release[4] == '1') && (ut.release[5] >= '0') && 433 (ut.release[5] < '8')) && 434 (fstat(data->dev, &st) == 0) && 435 (S_ISBLK(st.st_mode))) { 436 struct rlimit rlim; 437 438 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 439 setrlimit(RLIMIT_FSIZE, &rlim); 440 getrlimit(RLIMIT_FSIZE, &rlim); 441 if (((unsigned long) rlim.rlim_cur) < 442 ((unsigned long) rlim.rlim_max)) { 443 rlim.rlim_cur = rlim.rlim_max; 444 setrlimit(RLIMIT_FSIZE, &rlim); 445 } 446 } 447#endif 448 *channel = io; 449 return 0; 450 451cleanup: 452 if (data) { 453 free_cache(data); 454 ext2fs_free_mem(&data); 455 } 456 ext2fs_free_mem(&io); 457 return retval; 458} 459 460static errcode_t unix_close(io_channel channel) 461{ 462 struct unix_private_data *data; 463 errcode_t retval = 0; 464 465 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 466 data = (struct unix_private_data *) channel->private_data; 467 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 468 469 if (--channel->refcount > 0) 470 return 0; 471 472#ifndef NO_IO_CACHE 473 retval = flush_cached_blocks(channel, data, 0); 474#endif 475 476 if (close(data->dev) < 0) 477 retval = errno; 478 free_cache(data); 479 480 ext2fs_free_mem(&channel->private_data); 481 ext2fs_free_mem(&channel->name); 482 ext2fs_free_mem(&channel); 483 return retval; 484} 485 486static errcode_t unix_set_blksize(io_channel channel, int blksize) 487{ 488 struct unix_private_data *data; 489 errcode_t retval; 490 491 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 492 data = (struct unix_private_data *) channel->private_data; 493 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 494 495 if (channel->block_size != blksize) { 496#ifndef NO_IO_CACHE 497 if ((retval = flush_cached_blocks(channel, data, 0))) 498 return retval; 499#endif 500 501 channel->block_size = blksize; 502 free_cache(data); 503 if ((retval = alloc_cache(channel, data))) 504 return retval; 505 } 506 return 0; 507} 508 509 510static errcode_t unix_read_blk(io_channel channel, unsigned long block, 511 int count, void *buf) 512{ 513 struct unix_private_data *data; 514 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 515 errcode_t retval; 516 char *cp; 517 int i, j; 518 519 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 520 data = (struct unix_private_data *) channel->private_data; 521 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 522 523#ifdef NO_IO_CACHE 524 return raw_read_blk(channel, data, block, count, buf); 525#else 526 /* 527 * If we're doing an odd-sized read or a very large read, 528 * flush out the cache and then do a direct read. 529 */ 530 if (count < 0 || count > WRITE_DIRECT_SIZE) { 531 if ((retval = flush_cached_blocks(channel, data, 0))) 532 return retval; 533 return raw_read_blk(channel, data, block, count, buf); 534 } 535 536 cp = buf; 537 while (count > 0) { 538 /* If it's in the cache, use it! */ 539 if ((cache = find_cached_block(data, block, &reuse[0]))) { 540#ifdef DEBUG 541 printf("Using cached block %d\n", block); 542#endif 543 memcpy(cp, cache->buf, channel->block_size); 544 count--; 545 block++; 546 cp += channel->block_size; 547 continue; 548 } 549 /* 550 * Find the number of uncached blocks so we can do a 551 * single read request 552 */ 553 for (i=1; i < count; i++) 554 if (find_cached_block(data, block+i, &reuse[i])) 555 break; 556#ifdef DEBUG 557 printf("Reading %d blocks starting at %d\n", i, block); 558#endif 559 if ((retval = raw_read_blk(channel, data, block, i, cp))) 560 return retval; 561 562 /* Save the results in the cache */ 563 for (j=0; j < i; j++) { 564 count--; 565 cache = reuse[j]; 566 reuse_cache(channel, data, cache, block++); 567 memcpy(cache->buf, cp, channel->block_size); 568 cp += channel->block_size; 569 } 570 } 571 return 0; 572#endif /* NO_IO_CACHE */ 573} 574 575static errcode_t unix_write_blk(io_channel channel, unsigned long block, 576 int count, const void *buf) 577{ 578 struct unix_private_data *data; 579 struct unix_cache *cache, *reuse; 580 errcode_t retval = 0; 581 const char *cp; 582 int writethrough; 583 584 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 585 data = (struct unix_private_data *) channel->private_data; 586 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 587 588#ifdef NO_IO_CACHE 589 return raw_write_blk(channel, data, block, count, buf); 590#else 591 /* 592 * If we're doing an odd-sized write or a very large write, 593 * flush out the cache completely and then do a direct write. 594 */ 595 if (count < 0 || count > WRITE_DIRECT_SIZE) { 596 if ((retval = flush_cached_blocks(channel, data, 1))) 597 return retval; 598 return raw_write_blk(channel, data, block, count, buf); 599 } 600 601 /* 602 * For a moderate-sized multi-block write, first force a write 603 * if we're in write-through cache mode, and then fill the 604 * cache with the blocks. 605 */ 606 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 607 if (writethrough) 608 retval = raw_write_blk(channel, data, block, count, buf); 609 610 cp = buf; 611 while (count > 0) { 612 cache = find_cached_block(data, block, &reuse); 613 if (!cache) { 614 cache = reuse; 615 reuse_cache(channel, data, cache, block); 616 } 617 memcpy(cache->buf, cp, channel->block_size); 618 cache->dirty = !writethrough; 619 count--; 620 block++; 621 cp += channel->block_size; 622 } 623 return retval; 624#endif /* NO_IO_CACHE */ 625} 626 627static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 628 int size, const void *buf) 629{ 630 struct unix_private_data *data; 631 errcode_t retval = 0; 632 ssize_t actual; 633 634 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 635 data = (struct unix_private_data *) channel->private_data; 636 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 637 638#ifndef NO_IO_CACHE 639 /* 640 * Flush out the cache completely 641 */ 642 if ((retval = flush_cached_blocks(channel, data, 1))) 643 return retval; 644#endif 645 646 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) 647 return errno; 648 649 actual = write(data->dev, buf, size); 650 if (actual != size) 651 return EXT2_ET_SHORT_WRITE; 652 653 return 0; 654} 655 656/* 657 * Flush data buffers to disk. 658 */ 659static errcode_t unix_flush(io_channel channel) 660{ 661 struct unix_private_data *data; 662 errcode_t retval = 0; 663 664 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 665 data = (struct unix_private_data *) channel->private_data; 666 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 667 668#ifndef NO_IO_CACHE 669 retval = flush_cached_blocks(channel, data, 0); 670#endif 671 fsync(data->dev); 672 return retval; 673} 674 675static errcode_t unix_set_option(io_channel channel, const char *option, 676 const char *arg) 677{ 678 struct unix_private_data *data; 679 unsigned long tmp; 680 char *end; 681 682 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 683 data = (struct unix_private_data *) channel->private_data; 684 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 685 686 if (!strcmp(option, "offset")) { 687 if (!arg) 688 return EXT2_ET_INVALID_ARGUMENT; 689 690 tmp = strtoul(arg, &end, 0); 691 if (*end) 692 return EXT2_ET_INVALID_ARGUMENT; 693 data->offset = tmp; 694 return 0; 695 } 696 return EXT2_ET_INVALID_ARGUMENT; 697} 698