1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1997,2008 Oracle.  All rights reserved.
5 *
6 * $Id: zerofill.c,v 12.29 2008/03/12 19:13:07 mbrey Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12
13/*
14 * __db_zero_fill --
15 *	Zero out bytes in the file.
16 *
17 *	Pages allocated by writing pages past end-of-file are not zeroed,
18 *	on some systems.  Recovery could theoretically be fooled by a page
19 *	showing up that contained garbage.  In order to avoid this, we
20 *	have to write the pages out to disk, and flush them.  The reason
21 *	for the flush is because if we don't sync, the allocation of another
22 *	page subsequent to this one might reach the disk first, and if we
23 *	crashed at the right moment, leave us with this page as the one
24 *	allocated by writing a page past it in the file.
25 *
26 * PUBLIC: int __db_zero_fill __P((ENV *, DB_FH *));
27 */
28int
29__db_zero_fill(env, fhp)
30	ENV *env;
31	DB_FH *fhp;
32{
33#ifdef HAVE_FILESYSTEM_NOTZERO
34	off_t stat_offset, write_offset;
35	size_t blen, nw;
36	u_int32_t bytes, mbytes;
37	int group_sync, ret;
38	u_int8_t *bp;
39
40	/* Calculate the byte offset of the next write. */
41	write_offset = (off_t)fhp->pgno * fhp->pgsize + fhp->offset;
42
43	/* Stat the file. */
44	if ((ret = __os_ioinfo(env, NULL, fhp, &mbytes, &bytes, NULL)) != 0)
45		return (ret);
46	stat_offset = (off_t)mbytes * MEGABYTE + bytes;
47
48	/* Check if the file is large enough. */
49	if (stat_offset >= write_offset)
50		return (0);
51
52	/* Get a large buffer if we're writing lots of data. */
53#undef	ZF_LARGE_WRITE
54#define	ZF_LARGE_WRITE	(64 * 1024)
55	if ((ret = __os_calloc(env, 1, ZF_LARGE_WRITE, &bp)) != 0)
56		return (ret);
57	blen = ZF_LARGE_WRITE;
58
59	/* Seek to the current end of the file. */
60	if ((ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes)) != 0)
61		goto err;
62
63	/*
64	 * Hash is the only access method that allocates groups of pages.  Hash
65	 * uses the existence of the last page in a group to signify the entire
66	 * group is OK; so, write all the pages but the last one in the group,
67	 * flush them to disk, then write the last one to disk and flush it.
68	 */
69	for (group_sync = 0; stat_offset < write_offset; group_sync = 1) {
70		if (write_offset - stat_offset <= (off_t)blen) {
71			blen = (size_t)(write_offset - stat_offset);
72			if (group_sync && (ret = __os_fsync(env, fhp)) != 0)
73				goto err;
74		}
75		if ((ret = __os_physwrite(env, fhp, bp, blen, &nw)) != 0)
76			goto err;
77		stat_offset += blen;
78	}
79	if ((ret = __os_fsync(env, fhp)) != 0)
80		goto err;
81
82	/* Seek back to where we started. */
83	mbytes = (u_int32_t)(write_offset / MEGABYTE);
84	bytes = (u_int32_t)(write_offset % MEGABYTE);
85	ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes);
86
87err:	 __os_free(env, bp);
88	return (ret);
89#else
90	COMPQUIET(env, NULL);
91	COMPQUIET(fhp, NULL);
92	return (0);
93#endif /* HAVE_FILESYSTEM_NOTZERO */
94}
95
96/*
97 * __db_zero --
98 *	Zero to the end of the file.
99 *
100 * PUBLIC: int __db_zero_extend __P((ENV *,
101 * PUBLIC:     DB_FH *, db_pgno_t, db_pgno_t, u_int32_t));
102 */
103int
104__db_zero_extend(env, fhp, pgno, last_pgno, pgsize)
105	ENV *env;
106	DB_FH *fhp;
107	db_pgno_t pgno, last_pgno;
108	u_int32_t pgsize;
109{
110	int ret;
111	size_t nwrote;
112	u_int8_t *buf;
113
114	if ((ret = __os_calloc(env, 1, pgsize, &buf)) != 0)
115		return (ret);
116	memset(buf, 0, pgsize);
117	for (; pgno <= last_pgno; pgno++)
118		if ((ret = __os_io(env, DB_IO_WRITE,
119		    fhp, pgno, pgsize, 0, pgsize, buf, &nwrote)) != 0) {
120			if (ret == 0) {
121				ret = EIO;
122				goto err;
123			}
124			goto err;
125		}
126
127err:	__os_free(env, buf);
128	return (ret);
129}
130