1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: os_map.c,v 12.25 2008/05/07 12:27:35 bschmeck Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12 13static int __os_map 14 __P((ENV *, char *, REGINFO *, DB_FH *, size_t, int, int, int, void **)); 15static int __os_unique_name __P((_TCHAR *, HANDLE, _TCHAR *, size_t)); 16 17/* 18 * __os_attach -- 19 * Create/join a shared memory region. 20 */ 21int 22__os_attach(env, infop, rp) 23 ENV *env; 24 REGINFO *infop; 25 REGION *rp; 26{ 27 DB_FH *fhp; 28 int ret; 29 30 /* 31 * On Windows/9X, files that are opened by multiple processes do not 32 * share data correctly. For this reason, we require that DB_PRIVATE 33 * be specified on that platform. 34 */ 35 if (!F_ISSET(env, ENV_PRIVATE) && __os_is_winnt() == 0) { 36 __db_err(env, 37 EINVAL, "Windows 9X systems must specify DB_PRIVATE"); 38 return (EINVAL); 39 } 40 41 /* 42 * Try to open/create the file. We DO NOT need to ensure that multiple 43 * threads/processes attempting to simultaneously create the region are 44 * properly ordered, our caller has already taken care of that. 45 */ 46 if ((ret = __os_open(env, infop->name, 0, DB_OSO_REGION | 47 (F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE : 0), 48 env->db_mode, &fhp)) != 0) { 49 __db_err(env, ret, "%s", infop->name); 50 return (ret); 51 } 52 53 /* 54 * Map the file in. If we're creating an in-system-memory region, 55 * specify a segment ID (which is never used again) so that the 56 * calling code writes out the REGENV_REF structure to the primary 57 * environment file. 58 */ 59 ret = __os_map(env, infop->name, infop, fhp, rp->size, 60 1, F_ISSET(env, ENV_SYSTEM_MEM), 0, &infop->addr); 61 if (ret == 0 && F_ISSET(env, ENV_SYSTEM_MEM)) 62 rp->segid = 1; 63 64 (void)__os_closehandle(env, fhp); 65 66 return (ret); 67} 68 69/* 70 * __os_detach -- 71 * Detach from a shared memory region. 72 */ 73int 74__os_detach(env, infop, destroy) 75 ENV *env; 76 REGINFO *infop; 77 int destroy; 78{ 79 DB_ENV *dbenv; 80 int ret, t_ret; 81 82 dbenv = env->dbenv; 83 84 if (infop->wnt_handle != NULL) { 85 (void)CloseHandle(infop->wnt_handle); 86 infop->wnt_handle = NULL; 87 } 88 89 ret = !UnmapViewOfFile(infop->addr) ? __os_get_syserr() : 0; 90 if (ret != 0) { 91 __db_syserr(env, ret, "UnmapViewOfFile"); 92 ret = __os_posix_err(ret); 93 } 94 95 if (!F_ISSET(env, ENV_SYSTEM_MEM) && destroy && 96 (t_ret = __os_unlink(env, infop->name, 1)) != 0 && ret == 0) 97 ret = t_ret; 98 99 return (ret); 100} 101 102/* 103 * __os_mapfile -- 104 * Map in a shared memory file. 105 */ 106int 107__os_mapfile(env, path, fhp, len, is_rdonly, addr) 108 ENV *env; 109 char *path; 110 DB_FH *fhp; 111 int is_rdonly; 112 size_t len; 113 void **addr; 114{ 115#ifdef DB_WINCE 116 /* 117 * Windows CE has special requirements for file mapping to work. 118 * * The input handle needs to be opened using CreateFileForMapping 119 * * Concurrent access via a non mapped file is not supported. 120 * So we disable support for memory mapping files on Windows CE. It is 121 * currently only used as an optimization in mpool for small read only 122 * databases. 123 */ 124 return (EFAULT); 125#else 126 DB_ENV *dbenv; 127 128 dbenv = env == NULL ? NULL : env->dbenv; 129 130 if (dbenv != NULL && 131 FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) 132 __db_msg(env, "fileops: mmap %s", path); 133 return (__os_map(env, path, NULL, fhp, len, 0, 0, is_rdonly, addr)); 134#endif 135} 136 137/* 138 * __os_unmapfile -- 139 * Unmap the shared memory file. 140 */ 141int 142__os_unmapfile(env, addr, len) 143 ENV *env; 144 void *addr; 145 size_t len; 146{ 147 DB_ENV *dbenv; 148 149 dbenv = env == NULL ? NULL : env->dbenv; 150 151 if (dbenv != NULL && 152 FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) 153 __db_msg(env, "fileops: munmap"); 154 155 return (!UnmapViewOfFile(addr) ? __os_posix_err(__os_get_syserr()) : 0); 156} 157 158/* 159 * __os_unique_name -- 160 * Create a unique identifying name from a pathname (may be absolute or 161 * relative) and/or a file descriptor. 162 * 163 * The name returned must be unique (different files map to different 164 * names), and repeatable (same files, map to same names). It's not 165 * so easy to do by name. Should handle not only: 166 * 167 * foo.bar == ./foo.bar == c:/whatever_path/foo.bar 168 * 169 * but also understand that: 170 * 171 * foo.bar == Foo.Bar (FAT file system) 172 * foo.bar != Foo.Bar (NTFS) 173 * 174 * The best solution is to use the file index, found in the file 175 * information structure (similar to UNIX inode #). 176 * 177 * When a file is deleted, its file index may be reused, 178 * but if the unique name has not gone from its namespace, 179 * we may get a conflict. So to ensure some tie in to the 180 * original pathname, we also use the creation time and the 181 * file basename. This is not a perfect system, but it 182 * should work for all but anamolous test cases. 183 * 184 */ 185static int 186__os_unique_name(orig_path, hfile, result_path, result_path_len) 187 _TCHAR *orig_path, *result_path; 188 HANDLE hfile; 189 size_t result_path_len; 190{ 191 BY_HANDLE_FILE_INFORMATION fileinfo; 192 _TCHAR *basename, *p; 193 194 /* 195 * In Windows, pathname components are delimited by '/' or '\', and 196 * if neither is present, we need to strip off leading drive letter 197 * (e.g. c:foo.txt). 198 */ 199 basename = _tcsrchr(orig_path, '/'); 200 p = _tcsrchr(orig_path, '\\'); 201 if (basename == NULL || (p != NULL && p > basename)) 202 basename = p; 203 if (basename == NULL) 204 basename = _tcsrchr(orig_path, ':'); 205 206 if (basename == NULL) 207 basename = orig_path; 208 else 209 basename++; 210 211 if (!GetFileInformationByHandle(hfile, &fileinfo)) 212 return (__os_posix_err(__os_get_syserr())); 213 214 (void)_sntprintf(result_path, result_path_len, 215 _T("__db_shmem.%8.8lx.%8.8lx.%8.8lx.%8.8lx.%8.8lx.%s"), 216 fileinfo.dwVolumeSerialNumber, 217 fileinfo.nFileIndexHigh, 218 fileinfo.nFileIndexLow, 219 fileinfo.ftCreationTime.dwHighDateTime, 220 fileinfo.ftCreationTime.dwHighDateTime, 221 basename); 222 223 return (0); 224} 225 226/* 227 * __os_map -- 228 * The mmap(2) function for Windows. 229 */ 230static int 231__os_map(env, path, infop, fhp, len, is_region, is_system, is_rdonly, addr) 232 ENV *env; 233 REGINFO *infop; 234 char *path; 235 DB_FH *fhp; 236 int is_region, is_system, is_rdonly; 237 size_t len; 238 void **addr; 239{ 240 HANDLE hMemory; 241 int ret, use_pagefile; 242 _TCHAR *tpath, shmem_name[DB_MAXPATHLEN]; 243 void *pMemory; 244 unsigned __int64 len64; 245 246 ret = 0; 247 if (infop != NULL) 248 infop->wnt_handle = NULL; 249 250 /* 251 * On 64 bit systems, len is already a 64 bit value. 252 * On 32 bit systems len is a 32 bit value. 253 * Always convert to a 64 bit value, so that the high order 254 * DWORD can be simply extracted on 64 bit platforms. 255 */ 256 len64 = len; 257 258 use_pagefile = is_region && is_system; 259 260 /* 261 * If creating a region in system space, get a matching name in the 262 * paging file namespace. 263 */ 264 if (use_pagefile) { 265#ifdef DB_WINCE 266 __db_errx(env, "Unable to memory map regions using system " 267 "memory on WinCE."); 268 return (EFAULT); 269#endif 270 TO_TSTRING(env, path, tpath, ret); 271 if (ret != 0) 272 return (ret); 273 ret = __os_unique_name(tpath, fhp->handle, 274 shmem_name, sizeof(shmem_name)); 275 FREE_STRING(env, tpath); 276 if (ret != 0) 277 return (ret); 278 } 279 280 /* 281 * XXX 282 * DB: We have not implemented copy-on-write here. 283 * 284 * If this is an region in system memory, we try to open it using the 285 * OpenFileMapping() first, and only call CreateFileMapping() if we're 286 * really creating the section. There are two reasons: 287 * 288 * 1) We only create the mapping if we have newly created the region. 289 * This avoids a long-running problem caused by Windows reference 290 * counting, where regions that are closed by all processes are 291 * deleted. It turns out that just checking for a zeroed region 292 * is not good enough. See [#4882] and [#7127] for the details. 293 * 294 * 2) CreateFileMapping seems to mess up making the commit charge to 295 * the process. It thinks, incorrectly, that when we want to join a 296 * previously existing section, that it should make a commit charge 297 * for the whole section. In fact, there is no new committed memory 298 * whatever. The call can fail if there is insufficient memory free 299 * to handle the erroneous commit charge. So, we find that the 300 * bogus commit is not made if we call OpenFileMapping. 301 */ 302 hMemory = NULL; 303 if (use_pagefile) { 304#ifndef DB_WINCE 305 hMemory = OpenFileMapping( 306 is_rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS, 307 0, shmem_name); 308 309 if (hMemory == NULL && F_ISSET(infop, REGION_CREATE_OK)) 310 hMemory = CreateFileMapping((HANDLE)-1, 0, 311 is_rdonly ? PAGE_READONLY : PAGE_READWRITE, 312 (DWORD)(len64 >> 32), (DWORD)len64, shmem_name); 313#endif 314 } else { 315 hMemory = CreateFileMapping(fhp->handle, 0, 316 is_rdonly ? PAGE_READONLY : PAGE_READWRITE, 317 (DWORD)(len64 >> 32), (DWORD)len64, NULL); 318#ifdef DB_WINCE 319 /* 320 * WinCE automatically closes the handle passed in. 321 * Ensure DB does not attempt to close the handle again. 322 */ 323 fhp->handle = INVALID_HANDLE_VALUE; 324 F_CLR(fhp, DB_FH_OPENED); 325#endif 326 } 327 328 if (hMemory == NULL) { 329 ret = __os_get_syserr(); 330 __db_syserr(env, ret, "OpenFileMapping"); 331 return (__env_panic(env, __os_posix_err(ret))); 332 } 333 334 pMemory = MapViewOfFile(hMemory, 335 (is_rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS), 0, 0, len); 336 if (pMemory == NULL) { 337 ret = __os_get_syserr(); 338 __db_syserr(env, ret, "MapViewOfFile"); 339 return (__env_panic(env, __os_posix_err(ret))); 340 } 341 342 /* 343 * XXX 344 * It turns out that the kernel object underlying the named section 345 * is reference counted, but that the call to MapViewOfFile() above 346 * does NOT increment the reference count! So, if we close the handle 347 * here, the kernel deletes the object from the kernel namespace. 348 * When a second process comes along to join the region, the kernel 349 * happily creates a new object with the same name, but completely 350 * different identity. The two processes then have distinct isolated 351 * mapped sections, not at all what was wanted. Not closing the handle 352 * here fixes this problem. We carry the handle around in the region 353 * structure so we can close it when unmap is called. 354 */ 355 if (use_pagefile && infop != NULL) 356 infop->wnt_handle = hMemory; 357 else 358 CloseHandle(hMemory); 359 360 *addr = pMemory; 361 return (ret); 362} 363