rsyncfile.c revision 186743
1235633Sdim/*-
2235633Sdim * Copyright (c) 2008-2009, Ulf Lilleengen <lulf@FreeBSD.org>
3235633Sdim * All rights reserved.
4235633Sdim *
5235633Sdim * Redistribution and use in source and binary forms, with or without
6235633Sdim * modification, are permitted provided that the following conditions
7235633Sdim * are met:
8235633Sdim * 1. Redistributions of source code must retain the above copyright
9235633Sdim *    notice, this list of conditions and the following disclaimer.
10235633Sdim * 2. Redistributions in binary form must reproduce the above copyright
11235633Sdim *    notice, this list of conditions and the following disclaimer in the
12235633Sdim *    documentation and/or other materials provided with the distribution.
13235633Sdim *
14235633Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15235633Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16226584Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17235633Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18226584Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19226584Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20226584Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21235633Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22226584Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23226584Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24226584Sdim * SUCH DAMAGE.
25226584Sdim *
26235633Sdim * $FreeBSD: projects/csup_cvsmode/contrib/csup/rsyncfile.c 186743 2009-01-04 17:31:01Z lulf $
27226584Sdim */
28226584Sdim
29235633Sdim#include <errno.h>
30235633Sdim#include <string.h>
31235633Sdim#include <stdlib.h>
32235633Sdim#include <stdio.h>
33235633Sdim
34235633Sdim#include <sys/types.h>
35235633Sdim#include <sys/stat.h>
36235633Sdim#include <sys/mman.h>
37235633Sdim#include <fcntl.h>
38245431Sdim#include <unistd.h>
39235633Sdim
40252723Sdim#include "misc.h"
41245431Sdim#include "rsyncfile.h"
42245431Sdim
43245431Sdim#define MINBLOCKSIZE 1024
44245431Sdim#define MAXBLOCKSIZE (16 * 1024)
45245431Sdim#define RECEIVEBUFFERSIZE (15 * 1024)
46245431Sdim#define BLOCKINFOSIZE 26
47245431Sdim#define SEARCHREGION 10
48263509Sdim#define MAXBLOCKS (RECEIVEBUFFERSIZE / BLOCKINFOSIZE)
49263509Sdim
50263509Sdim#define CHAR_OFFSET 3
51263509Sdim#define RSUM_SIZE 9
52235633Sdim
53235633Sdimstruct rsyncfile {
54235633Sdim	char *start;
55235633Sdim	char *buf;
56235633Sdim	char *end;
57235633Sdim	size_t blocksize;
58235633Sdim	size_t fsize;
59235633Sdim	int fd;
60235633Sdim
61235633Sdim	char *blockptr;
62235633Sdim	int blocknum;
63235633Sdim	char blockmd5[MD5_DIGEST_SIZE];
64235633Sdim	char rsumstr[RSUM_SIZE];
65235633Sdim	uint32_t rsum;
66235633Sdim};
67235633Sdim
68235633Sdimstatic size_t		rsync_chooseblocksize(size_t);
69235633Sdimstatic uint32_t		rsync_rollsum(char *, size_t);
70245431Sdim
71245431Sdim/* Open a file and initialize variable for rsync operation. */
72263509Sdimstruct rsyncfile *
73245431Sdimrsync_open(char *path, size_t blocksize, int rdonly)
74235633Sdim{
75235633Sdim	struct rsyncfile *rf;
76245431Sdim	struct stat st;
77245431Sdim	int error;
78245431Sdim
79245431Sdim	rf = xmalloc(sizeof(*rf));
80245431Sdim	error = stat(path, &st);
81245431Sdim	if (error) {
82245431Sdim		free(rf);
83245431Sdim		return (NULL);
84263509Sdim	}
85263509Sdim	rf->fsize = st.st_size;
86263509Sdim
87263509Sdim	rf->fd = open(path, rdonly ? O_RDONLY : O_RDWR);
88263509Sdim	if (rf->fd < 0) {
89263509Sdim		free(rf);
90263509Sdim		return (NULL);
91235633Sdim	}
92235633Sdim	rf->buf = mmap(0, rf->fsize, PROT_READ, MAP_SHARED, rf->fd, 0);
93235633Sdim	if (rf->buf == MAP_FAILED) {
94235633Sdim		free(rf);
95235633Sdim		return (NULL);
96226584Sdim	}
97235633Sdim	rf->start = rf->buf;
98235633Sdim	rf->end = rf->buf + rf->fsize;
99235633Sdim	rf->blocksize = (blocksize == 0 ? rsync_chooseblocksize(rf->fsize) :
100235633Sdim	    blocksize);
101226584Sdim	rf->blockptr = rf->buf;
102226584Sdim	rf->blocknum = 0;
103235633Sdim	return (rf);
104235633Sdim}
105235633Sdim
106226584Sdim/* Close and free all resources related to an rsync file transfer. */
107235633Sdimint
108245431Sdimrsync_close(struct rsyncfile *rf)
109245431Sdim{
110226584Sdim	int error;
111226584Sdim
112245431Sdim	error = munmap(rf->buf, rf->fsize);
113226584Sdim	if (error)
114226584Sdim		return (error);
115235633Sdim	close(rf->fd);
116226584Sdim	free(rf);
117235633Sdim	return (0);
118235633Sdim}
119235633Sdim
120235633Sdim/*
121235633Sdim * Choose the most appropriate block size for an rsync transfer. Modeled
122235633Sdim * algorithm after cvsup.
123235633Sdim */
124235633Sdimstatic size_t
125235633Sdimrsync_chooseblocksize(size_t fsize)
126235633Sdim{
127235633Sdim	size_t bestrem, blocksize, bs, hisearch, losearch, rem;
128235633Sdim
129235633Sdim	blocksize = fsize / MAXBLOCKS;
130235633Sdim	losearch = blocksize - SEARCHREGION;
131235633Sdim	hisearch = blocksize + SEARCHREGION;
132235633Sdim
133235633Sdim	if (losearch < MINBLOCKSIZE) {
134235633Sdim		losearch = MINBLOCKSIZE;
135235633Sdim		hisearch = losearch + (2 * SEARCHREGION);
136235633Sdim	} else if (hisearch > MAXBLOCKSIZE) {
137235633Sdim		hisearch = MAXBLOCKSIZE;
138235633Sdim		losearch = hisearch - (2 * SEARCHREGION);
139235633Sdim	}
140235633Sdim
141235633Sdim	bestrem = MAXBLOCKSIZE;
142235633Sdim	for (bs = losearch; bs <= hisearch; bs++) {
143235633Sdim		rem = fsize % bs;
144235633Sdim		if (rem < bestrem) {
145235633Sdim			bestrem = rem;
146235633Sdim			blocksize = bs;
147235633Sdim		}
148235633Sdim	}
149235633Sdim	return (bestrem);
150245431Sdim}
151245431Sdim
152235633Sdim/* Get the next rsync block of a file. */
153235633Sdimint
154235633Sdimrsync_nextblock(struct rsyncfile *rf)
155235633Sdim{
156235633Sdim	MD5_CTX ctx;
157235633Sdim	size_t blocksize;
158235633Sdim
159226584Sdim	if (rf->blockptr >= rf->end)
160226584Sdim		return (0);
161235633Sdim	blocksize = min((size_t)(rf->end - rf->blockptr), rf->blocksize);
162235633Sdim	/* Calculate MD5 of the block. */
163235633Sdim	MD5_Init(&ctx);
164235633Sdim	MD5_Update(&ctx, rf->blockptr, blocksize);
165235633Sdim	MD5_End(rf->blockmd5, &ctx);
166235633Sdim
167235633Sdim	rf->rsum = rsync_rollsum(rf->blockptr, blocksize);
168235633Sdim	snprintf(rf->rsumstr, RSUM_SIZE, "%x", rf->rsum);
169235633Sdim	rf->blocknum++;
170235633Sdim	rf->blockptr += blocksize;
171235633Sdim	return (1);
172235633Sdim}
173235633Sdim
174235633Sdim/* Get the rolling checksum of a file. */
175235633Sdimstatic uint32_t
176235633Sdimrsync_rollsum(char *buf, size_t len)
177235633Sdim{
178235633Sdim	uint32_t a, b;
179235633Sdim	char *ptr, *limit;
180235633Sdim
181235633Sdim	a = b = 0;
182235633Sdim	ptr = buf;
183235633Sdim	limit = buf + len;
184235633Sdim
185235633Sdim	while (ptr < limit) {
186235633Sdim		a += *ptr + CHAR_OFFSET;
187235633Sdim		b += a;
188235633Sdim		ptr++;
189235633Sdim	}
190235633Sdim	return ((b << 16) | a);
191235633Sdim}
192245431Sdim
193245431Sdim/* Get running sum so far. */
194245431Sdimchar *
195245431Sdimrsync_rsum(struct rsyncfile *rf)
196245431Sdim{
197245431Sdim
198245431Sdim	return (rf->rsumstr);
199245431Sdim}
200245431Sdim
201245431Sdim/* Get MD5 of current block. */
202245431Sdimchar *
203263509Sdimrsync_blockmd5(struct rsyncfile *rf)
204263509Sdim{
205263509Sdim
206263509Sdim	return (rf->blockmd5);
207263509Sdim}
208263509Sdim
209263509Sdim/* Accessor for blocksize. */
210263509Sdimsize_t
211263509Sdimrsync_blocksize(struct rsyncfile *rf)
212263509Sdim{
213263509Sdim
214263509Sdim	return (rf->blocksize);
215263509Sdim}
216263509Sdim
217235633Sdim/* Accessor for filesize. */
218235633Sdimsize_t
219235633Sdimrsync_filesize(struct rsyncfile *rf)
220235633Sdim{
221235633Sdim
222235633Sdim	return (rf->fsize);
223235633Sdim}
224235633Sdim