1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1997,2008 Oracle. All rights reserved. 5 * 6 * $Id: os_rw.c,v 12.32 2008/02/12 16:08:51 bostic Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12 13/* 14 * __os_io -- 15 * Do an I/O. 16 * 17 * PUBLIC: int __os_io __P((ENV *, int, DB_FH *, db_pgno_t, 18 * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int8_t *, size_t *)); 19 */ 20int 21__os_io(env, op, fhp, pgno, pgsize, relative, io_len, buf, niop) 22 ENV *env; 23 int op; 24 DB_FH *fhp; 25 db_pgno_t pgno; 26 u_int32_t pgsize, relative, io_len; 27 u_int8_t *buf; 28 size_t *niop; 29{ 30#if defined(HAVE_PREAD) && defined(HAVE_PWRITE) 31 DB_ENV *dbenv; 32 off_t offset; 33 ssize_t nio; 34#endif 35 int ret; 36 37 /* 38 * Check for illegal usage. 39 * 40 * This routine is used in one of two ways: reading bytes from an 41 * absolute offset and reading a specific database page. All of 42 * our absolute offsets are known to fit into a u_int32_t, while 43 * our database pages might be at offsets larger than a u_int32_t. 44 * We don't want to specify an absolute offset in our caller as we 45 * aren't exactly sure what size an off_t might be. 46 */ 47 DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); 48 DB_ASSERT(env, (pgno == 0 && pgsize == 0) || relative == 0); 49 50#if defined(HAVE_PREAD) && defined(HAVE_PWRITE) 51 dbenv = env == NULL ? NULL : env->dbenv; 52 53 if ((offset = relative) == 0) 54 offset = (off_t)pgno * pgsize; 55 switch (op) { 56 case DB_IO_READ: 57 if (DB_GLOBAL(j_read) != NULL) 58 goto slow; 59#if defined(HAVE_STATISTICS) 60 ++fhp->read_count; 61#endif 62 if (dbenv != NULL && 63 FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) 64 __db_msg(env, 65 "fileops: read %s: %lu bytes at offset %lu", 66 fhp->name, (u_long)io_len, (u_long)offset); 67 68 LAST_PANIC_CHECK_BEFORE_IO(env); 69 nio = DB_GLOBAL(j_pread) != NULL ? 70 DB_GLOBAL(j_pread)(fhp->fd, buf, io_len, offset) : 71 pread(fhp->fd, buf, io_len, offset); 72 break; 73 case DB_IO_WRITE: 74 if (DB_GLOBAL(j_write) != NULL) 75 goto slow; 76#ifdef HAVE_FILESYSTEM_NOTZERO 77 if (__os_fs_notzero()) 78 goto slow; 79#endif 80#if defined(HAVE_STATISTICS) 81 ++fhp->write_count; 82#endif 83 if (dbenv != NULL && 84 FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) 85 __db_msg(env, 86 "fileops: write %s: %lu bytes at offset %lu", 87 fhp->name, (u_long)io_len, (u_long)offset); 88 89 LAST_PANIC_CHECK_BEFORE_IO(env); 90 nio = DB_GLOBAL(j_pwrite) != NULL ? 91 DB_GLOBAL(j_pwrite)(fhp->fd, buf, io_len, offset) : 92 pwrite(fhp->fd, buf, io_len, offset); 93 break; 94 default: 95 return (EINVAL); 96 } 97 if (nio == (ssize_t)io_len) { 98 *niop = io_len; 99 return (0); 100 } 101slow: 102#endif 103 MUTEX_LOCK(env, fhp->mtx_fh); 104 105 if ((ret = __os_seek(env, fhp, pgno, pgsize, relative)) != 0) 106 goto err; 107 switch (op) { 108 case DB_IO_READ: 109 ret = __os_read(env, fhp, buf, io_len, niop); 110 break; 111 case DB_IO_WRITE: 112 ret = __os_write(env, fhp, buf, io_len, niop); 113 break; 114 default: 115 ret = EINVAL; 116 break; 117 } 118 119err: MUTEX_UNLOCK(env, fhp->mtx_fh); 120 121 return (ret); 122 123} 124 125/* 126 * __os_read -- 127 * Read from a file handle. 128 * 129 * PUBLIC: int __os_read __P((ENV *, DB_FH *, void *, size_t, size_t *)); 130 */ 131int 132__os_read(env, fhp, addr, len, nrp) 133 ENV *env; 134 DB_FH *fhp; 135 void *addr; 136 size_t len; 137 size_t *nrp; 138{ 139 DB_ENV *dbenv; 140 size_t offset; 141 ssize_t nr; 142 int ret; 143 u_int8_t *taddr; 144 145 dbenv = env == NULL ? NULL : env->dbenv; 146 ret = 0; 147 148 DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); 149 150#if defined(HAVE_STATISTICS) 151 ++fhp->read_count; 152#endif 153 if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) 154 __db_msg(env, 155 "fileops: read %s: %lu bytes", fhp->name, (u_long)len); 156 157 if (DB_GLOBAL(j_read) != NULL) { 158 *nrp = len; 159 LAST_PANIC_CHECK_BEFORE_IO(env); 160 if (DB_GLOBAL(j_read)(fhp->fd, addr, len) != (ssize_t)len) { 161 ret = __os_get_syserr(); 162 __db_syserr(env, ret, "read: %#lx, %lu", 163 P_TO_ULONG(addr), (u_long)len); 164 ret = __os_posix_err(ret); 165 } 166 return (ret); 167 } 168 169 for (taddr = addr, offset = 0; 170 offset < len; taddr += nr, offset += (u_int32_t)nr) { 171 LAST_PANIC_CHECK_BEFORE_IO(env); 172 RETRY_CHK(((nr = read(fhp->fd, 173 CHAR_STAR_CAST taddr, len - offset)) < 0 ? 1 : 0), ret); 174 if (nr == 0 || ret != 0) 175 break; 176 } 177 *nrp = (size_t)(taddr - (u_int8_t *)addr); 178 if (ret != 0) { 179 __db_syserr(env, ret, "read: %#lx, %lu", 180 P_TO_ULONG(taddr), (u_long)len - offset); 181 ret = __os_posix_err(ret); 182 } 183 return (ret); 184} 185 186/* 187 * __os_write -- 188 * Write to a file handle. 189 * 190 * PUBLIC: int __os_write __P((ENV *, DB_FH *, void *, size_t, size_t *)); 191 */ 192int 193__os_write(env, fhp, addr, len, nwp) 194 ENV *env; 195 DB_FH *fhp; 196 void *addr; 197 size_t len; 198 size_t *nwp; 199{ 200 DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); 201 202#ifdef HAVE_FILESYSTEM_NOTZERO 203 /* Zero-fill as necessary. */ 204 if (__os_fs_notzero()) { 205 int ret; 206 if ((ret = __db_zero_fill(env, fhp)) != 0) 207 return (ret); 208 } 209#endif 210 return (__os_physwrite(env, fhp, addr, len, nwp)); 211} 212 213/* 214 * __os_physwrite -- 215 * Physical write to a file handle. 216 * 217 * PUBLIC: int __os_physwrite 218 * PUBLIC: __P((ENV *, DB_FH *, void *, size_t, size_t *)); 219 */ 220int 221__os_physwrite(env, fhp, addr, len, nwp) 222 ENV *env; 223 DB_FH *fhp; 224 void *addr; 225 size_t len; 226 size_t *nwp; 227{ 228 DB_ENV *dbenv; 229 size_t offset; 230 ssize_t nw; 231 int ret; 232 u_int8_t *taddr; 233 234 dbenv = env == NULL ? NULL : env->dbenv; 235 ret = 0; 236 237#if defined(HAVE_STATISTICS) 238 ++fhp->write_count; 239#endif 240 if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) 241 __db_msg(env, 242 "fileops: write %s: %lu bytes", fhp->name, (u_long)len); 243 244#if defined(HAVE_FILESYSTEM_NOTZERO) && defined(DIAGNOSTIC) 245 if (__os_fs_notzero()) { 246 struct stat sb; 247 off_t cur_off; 248 249 DB_ASSERT(env, fstat(fhp->fd, &sb) != -1 && 250 (cur_off = lseek(fhp->fd, (off_t)0, SEEK_CUR)) != -1 && 251 cur_off <= sb.st_size); 252 } 253#endif 254 if (DB_GLOBAL(j_write) != NULL) { 255 *nwp = len; 256 LAST_PANIC_CHECK_BEFORE_IO(env); 257 if (DB_GLOBAL(j_write)(fhp->fd, addr, len) != (ssize_t)len) { 258 ret = __os_get_syserr(); 259 __db_syserr(env, ret, "write: %#lx, %lu", 260 P_TO_ULONG(addr), (u_long)len); 261 ret = __os_posix_err(ret); 262 263 DB_EVENT(env, DB_EVENT_WRITE_FAILED, NULL); 264 } 265 return (ret); 266 } 267 268 for (taddr = addr, offset = 0; 269 offset < len; taddr += nw, offset += (u_int32_t)nw) { 270 LAST_PANIC_CHECK_BEFORE_IO(env); 271 RETRY_CHK(((nw = write(fhp->fd, 272 CHAR_STAR_CAST taddr, len - offset)) < 0 ? 1 : 0), ret); 273 if (ret != 0) 274 break; 275 } 276 *nwp = len; 277 if (ret != 0) { 278 __db_syserr(env, ret, "write: %#lx, %lu", 279 P_TO_ULONG(taddr), (u_long)len - offset); 280 ret = __os_posix_err(ret); 281 282 DB_EVENT(env, DB_EVENT_WRITE_FAILED, NULL); 283 } 284 return (ret); 285} 286