1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1997,2008 Oracle. All rights reserved. 5 * 6 * $Id: os_open.c,v 12.29 2008/03/26 04:11:35 david Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12 13/* 14 * __os_open -- 15 * Open a file descriptor (including page size and log size information). 16 */ 17int 18__os_open(env, name, page_size, flags, mode, fhpp) 19 ENV *env; 20 const char *name; 21 u_int32_t page_size, flags; 22 int mode; 23 DB_FH **fhpp; 24{ 25 DB_ENV *dbenv; 26 DB_FH *fhp; 27#ifndef DB_WINCE 28 DWORD cluster_size, sector_size, free_clusters, total_clusters; 29 _TCHAR *drive, dbuf[4]; /* <letter><colon><slash><nul> */ 30#endif 31 int access, attr, createflag, nrepeat, ret, share; 32 _TCHAR *tname; 33 34 dbenv = env == NULL ? NULL : env->dbenv; 35 *fhpp = NULL; 36 tname = NULL; 37 38 if (dbenv != NULL && 39 FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) 40 __db_msg(env, "fileops: open %s", name); 41 42#define OKFLAGS \ 43 (DB_OSO_ABSMODE | DB_OSO_CREATE | DB_OSO_DIRECT | DB_OSO_DSYNC |\ 44 DB_OSO_EXCL | DB_OSO_RDONLY | DB_OSO_REGION | DB_OSO_SEQ | \ 45 DB_OSO_TEMP | DB_OSO_TRUNC) 46 if ((ret = __db_fchk(env, "__os_open", flags, OKFLAGS)) != 0) 47 return (ret); 48 49 TO_TSTRING(env, name, tname, ret); 50 if (ret != 0) 51 goto err; 52 53 /* 54 * Allocate the file handle and copy the file name. We generally only 55 * use the name for verbose or error messages, but on systems where we 56 * can't unlink temporary files immediately, we use the name to unlink 57 * the temporary file when the file handle is closed. 58 * 59 * Lock the ENV handle and insert the new file handle on the list. 60 */ 61 if ((ret = __os_calloc(env, 1, sizeof(DB_FH), &fhp)) != 0) 62 return (ret); 63 if ((ret = __os_strdup(env, name, &fhp->name)) != 0) 64 goto err; 65 if (env != NULL) { 66 MUTEX_LOCK(env, env->mtx_env); 67 TAILQ_INSERT_TAIL(&env->fdlist, fhp, q); 68 MUTEX_UNLOCK(env, env->mtx_env); 69 F_SET(fhp, DB_FH_ENVLINK); 70 } 71 72 /* 73 * Otherwise, use the Windows/32 CreateFile interface so that we can 74 * play magic games with files to get data flush effects similar to 75 * the POSIX O_DSYNC flag. 76 * 77 * !!! 78 * We currently ignore the 'mode' argument. It would be possible 79 * to construct a set of security attributes that we could pass to 80 * CreateFile that would accurately represents the mode. In worst 81 * case, this would require looking up user and all group names and 82 * creating an entry for each. Alternatively, we could call the 83 * _chmod (partial emulation) function after file creation, although 84 * this leaves us with an obvious race. However, these efforts are 85 * largely meaningless on FAT, the most common file system, which 86 * only has a "readable" and "writeable" flag, applying to all users. 87 */ 88 access = GENERIC_READ; 89 if (!LF_ISSET(DB_OSO_RDONLY)) 90 access |= GENERIC_WRITE; 91 92#ifdef DB_WINCE 93 /* 94 * WinCE translates these flags into share flags for 95 * CreateFileForMapping. 96 * Also WinCE does not support the FILE_SHARE_DELETE flag. 97 */ 98 if (LF_ISSET(DB_OSO_REGION)) 99 share = GENERIC_READ | GENERIC_WRITE; 100 else 101 share = FILE_SHARE_READ | FILE_SHARE_WRITE; 102#else 103 share = FILE_SHARE_READ | FILE_SHARE_WRITE; 104 if (__os_is_winnt()) 105 share |= FILE_SHARE_DELETE; 106#endif 107 attr = FILE_ATTRIBUTE_NORMAL; 108 109 /* 110 * Reproduce POSIX 1003.1 semantics: if O_CREATE and O_EXCL are both 111 * specified, fail, returning EEXIST, unless we create the file. 112 */ 113 if (LF_ISSET(DB_OSO_CREATE) && LF_ISSET(DB_OSO_EXCL)) 114 createflag = CREATE_NEW; /* create only if !exist*/ 115 else if (!LF_ISSET(DB_OSO_CREATE) && LF_ISSET(DB_OSO_TRUNC)) 116 createflag = TRUNCATE_EXISTING; /* truncate, fail if !exist */ 117 else if (LF_ISSET(DB_OSO_TRUNC)) 118 createflag = CREATE_ALWAYS; /* create and truncate */ 119 else if (LF_ISSET(DB_OSO_CREATE)) 120 createflag = OPEN_ALWAYS; /* open or create */ 121 else 122 createflag = OPEN_EXISTING; /* open only if existing */ 123 124 if (LF_ISSET(DB_OSO_DSYNC)) { 125 F_SET(fhp, DB_FH_NOSYNC); 126 attr |= FILE_FLAG_WRITE_THROUGH; 127 } 128 129#ifndef DB_WINCE 130 if (LF_ISSET(DB_OSO_SEQ)) 131 attr |= FILE_FLAG_SEQUENTIAL_SCAN; 132 else 133 attr |= FILE_FLAG_RANDOM_ACCESS; 134#endif 135 136 if (LF_ISSET(DB_OSO_TEMP)) 137 attr |= FILE_FLAG_DELETE_ON_CLOSE; 138 139 /* 140 * We can turn filesystem buffering off if the page size is a 141 * multiple of the disk's sector size. To find the sector size, 142 * we call GetDiskFreeSpace, which expects a drive name like "d:\\" 143 * or NULL for the current disk (i.e., a relative path). 144 * 145 * WinCE only has GetDiskFreeSpaceEx which does not 146 * return the sector size. 147 */ 148#ifndef DB_WINCE 149 if (LF_ISSET(DB_OSO_DIRECT) && page_size != 0 && name[0] != '\0') { 150 if (name[1] == ':') { 151 drive = dbuf; 152 _sntprintf(dbuf, sizeof(dbuf), _T("%c:\\"), tname[0]); 153 } else 154 drive = NULL; 155 156 /* 157 * We ignore all results except sectorsize, but some versions 158 * of Windows require that the parameters are non-NULL. 159 */ 160 if (GetDiskFreeSpace(drive, &cluster_size, 161 §or_size, &free_clusters, &total_clusters) && 162 page_size % sector_size == 0) 163 attr |= FILE_FLAG_NO_BUFFERING; 164 } 165#endif 166 167 fhp->handle = fhp->trunc_handle = INVALID_HANDLE_VALUE; 168 for (nrepeat = 1;; ++nrepeat) { 169 if (fhp->handle == INVALID_HANDLE_VALUE) { 170#ifdef DB_WINCE 171 if (LF_ISSET(DB_OSO_REGION)) 172 fhp->handle = CreateFileForMapping(tname, 173 access, share, NULL, createflag, attr, 0); 174 else 175#endif 176 fhp->handle = CreateFile(tname, 177 access, share, NULL, createflag, attr, 0); 178 } 179 180 /* 181 * Since WinCE does not support truncate, we don't 182 * need to open this second handle. 183 * This code will not work unaltered on WinCE, the 184 * creation of the second handle fails. 185 */ 186#ifndef DB_WINCE 187 /* 188 * Windows does not provide truncate directly. There is no 189 * safe way to use a handle for truncate concurrently with 190 * reads or writes. To deal with this, we open a second handle 191 * used just for truncating. 192 */ 193 if (fhp->handle != INVALID_HANDLE_VALUE && 194 !LF_ISSET(DB_OSO_RDONLY | DB_OSO_TEMP) && 195 fhp->trunc_handle == INVALID_HANDLE_VALUE) 196 fhp->trunc_handle = CreateFile( 197 tname, access, share, NULL, OPEN_EXISTING, attr, 0); 198 199 if (fhp->handle == INVALID_HANDLE_VALUE || 200 (!LF_ISSET(DB_OSO_RDONLY | DB_OSO_TEMP) && 201 fhp->trunc_handle == INVALID_HANDLE_VALUE)) 202#else 203 if (fhp->handle == INVALID_HANDLE_VALUE) 204#endif 205 { 206 /* 207 * If it's a "temporary" error, we retry up to 3 times, 208 * waiting up to 12 seconds. While it's not a problem 209 * if we can't open a database, an inability to open a 210 * log file is cause for serious dismay. 211 */ 212 ret = __os_posix_err(__os_get_syserr()); 213 if ((ret != ENFILE && ret != EMFILE && ret != ENOSPC) || 214 nrepeat > 3) 215 goto err; 216 217 __os_yield(env, nrepeat * 2, 0); 218 } else 219 break; 220 } 221 222 FREE_STRING(env, tname); 223 224 if (LF_ISSET(DB_OSO_REGION)) 225 F_SET(fhp, DB_FH_REGION); 226 F_SET(fhp, DB_FH_OPENED); 227 *fhpp = fhp; 228 return (0); 229 230err: FREE_STRING(env, tname); 231 if (fhp != NULL) 232 (void)__os_closehandle(env, fhp); 233 return (ret); 234} 235