1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1997,2008 Oracle. All rights reserved. 5 * 6 * $Id: zerofill.c,v 12.29 2008/03/12 19:13:07 mbrey Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12 13/* 14 * __db_zero_fill -- 15 * Zero out bytes in the file. 16 * 17 * Pages allocated by writing pages past end-of-file are not zeroed, 18 * on some systems. Recovery could theoretically be fooled by a page 19 * showing up that contained garbage. In order to avoid this, we 20 * have to write the pages out to disk, and flush them. The reason 21 * for the flush is because if we don't sync, the allocation of another 22 * page subsequent to this one might reach the disk first, and if we 23 * crashed at the right moment, leave us with this page as the one 24 * allocated by writing a page past it in the file. 25 * 26 * PUBLIC: int __db_zero_fill __P((ENV *, DB_FH *)); 27 */ 28int 29__db_zero_fill(env, fhp) 30 ENV *env; 31 DB_FH *fhp; 32{ 33#ifdef HAVE_FILESYSTEM_NOTZERO 34 off_t stat_offset, write_offset; 35 size_t blen, nw; 36 u_int32_t bytes, mbytes; 37 int group_sync, ret; 38 u_int8_t *bp; 39 40 /* Calculate the byte offset of the next write. */ 41 write_offset = (off_t)fhp->pgno * fhp->pgsize + fhp->offset; 42 43 /* Stat the file. */ 44 if ((ret = __os_ioinfo(env, NULL, fhp, &mbytes, &bytes, NULL)) != 0) 45 return (ret); 46 stat_offset = (off_t)mbytes * MEGABYTE + bytes; 47 48 /* Check if the file is large enough. */ 49 if (stat_offset >= write_offset) 50 return (0); 51 52 /* Get a large buffer if we're writing lots of data. */ 53#undef ZF_LARGE_WRITE 54#define ZF_LARGE_WRITE (64 * 1024) 55 if ((ret = __os_calloc(env, 1, ZF_LARGE_WRITE, &bp)) != 0) 56 return (ret); 57 blen = ZF_LARGE_WRITE; 58 59 /* Seek to the current end of the file. */ 60 if ((ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes)) != 0) 61 goto err; 62 63 /* 64 * Hash is the only access method that allocates groups of pages. Hash 65 * uses the existence of the last page in a group to signify the entire 66 * group is OK; so, write all the pages but the last one in the group, 67 * flush them to disk, then write the last one to disk and flush it. 68 */ 69 for (group_sync = 0; stat_offset < write_offset; group_sync = 1) { 70 if (write_offset - stat_offset <= (off_t)blen) { 71 blen = (size_t)(write_offset - stat_offset); 72 if (group_sync && (ret = __os_fsync(env, fhp)) != 0) 73 goto err; 74 } 75 if ((ret = __os_physwrite(env, fhp, bp, blen, &nw)) != 0) 76 goto err; 77 stat_offset += blen; 78 } 79 if ((ret = __os_fsync(env, fhp)) != 0) 80 goto err; 81 82 /* Seek back to where we started. */ 83 mbytes = (u_int32_t)(write_offset / MEGABYTE); 84 bytes = (u_int32_t)(write_offset % MEGABYTE); 85 ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes); 86 87err: __os_free(env, bp); 88 return (ret); 89#else 90 COMPQUIET(env, NULL); 91 COMPQUIET(fhp, NULL); 92 return (0); 93#endif /* HAVE_FILESYSTEM_NOTZERO */ 94} 95 96/* 97 * __db_zero -- 98 * Zero to the end of the file. 99 * 100 * PUBLIC: int __db_zero_extend __P((ENV *, 101 * PUBLIC: DB_FH *, db_pgno_t, db_pgno_t, u_int32_t)); 102 */ 103int 104__db_zero_extend(env, fhp, pgno, last_pgno, pgsize) 105 ENV *env; 106 DB_FH *fhp; 107 db_pgno_t pgno, last_pgno; 108 u_int32_t pgsize; 109{ 110 int ret; 111 size_t nwrote; 112 u_int8_t *buf; 113 114 if ((ret = __os_calloc(env, 1, pgsize, &buf)) != 0) 115 return (ret); 116 memset(buf, 0, pgsize); 117 for (; pgno <= last_pgno; pgno++) 118 if ((ret = __os_io(env, DB_IO_WRITE, 119 fhp, pgno, pgsize, 0, pgsize, buf, &nwrote)) != 0) { 120 if (ret == 0) { 121 ret = EIO; 122 goto err; 123 } 124 goto err; 125 } 126 127err: __os_free(env, buf); 128 return (ret); 129} 130