rsyncfile.c revision 184257
1184257Slulf/*-
2184257Slulf * Copyright (c) 2008, Ulf Lilleengen <lulf@FreeBSD.org>
3184257Slulf * All rights reserved.
4184257Slulf *
5184257Slulf * Redistribution and use in source and binary forms, with or without
6184257Slulf * modification, are permitted provided that the following conditions
7184257Slulf * are met:
8184257Slulf * 1. Redistributions of source code must retain the above copyright
9184257Slulf *    notice, this list of conditions and the following disclaimer.
10184257Slulf * 2. Redistributions in binary form must reproduce the above copyright
11184257Slulf *    notice, this list of conditions and the following disclaimer in the
12184257Slulf *    documentation and/or other materials provided with the distribution.
13184257Slulf *
14184257Slulf * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15184257Slulf * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16184257Slulf * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17184257Slulf * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18184257Slulf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19184257Slulf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20184257Slulf * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21184257Slulf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22184257Slulf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23184257Slulf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24184257Slulf * SUCH DAMAGE.
25184257Slulf *
26184257Slulf * $FreeBSD: projects/csup_cvsmode/contrib/csup/rsyncfile.c 184257 2008-10-25 10:54:28Z lulf $
27184257Slulf */
28184257Slulf
29184257Slulf#include <errno.h>
30184257Slulf#include <string.h>
31184257Slulf#include <stdlib.h>
32184257Slulf#include <stdio.h>
33184257Slulf
34184257Slulf#include <sys/types.h>
35184257Slulf#include <sys/stat.h>
36184257Slulf#include <sys/mman.h>
37184257Slulf#include <unistd.h>
38184257Slulf
39184257Slulf#include "misc.h"
40184257Slulf#include "fattr.h"
41184257Slulf#include "rsyncfile.h"
42184257Slulf
43184257Slulf#define MINBLOCKSIZE 1024
44184257Slulf#define MAXBLOCKSIZE (16 * 1024)
45184257Slulf#define RECEIVEBUFFERSIZE (15 * 1024)
46184257Slulf#define BLOCKINFOSIZE 26
47184257Slulf#define SEARCHREGION 10
48184257Slulf#define MAXBLOCKS (RECEIVEBUFFERSIZE / BLOCKINFOSIZE)
49184257Slulf
50184257Slulf#define CHAR_OFFSET 3
51184257Slulf#define RSUM_SIZE 9
52184257Slulf
53184257Slulfstruct rsyncfile {
54184257Slulf	char *start;
55184257Slulf	char *buf;
56184257Slulf	char *end;
57184257Slulf	size_t blocksize;
58184257Slulf	size_t fsize;
59184257Slulf	struct fattr *fa;
60184257Slulf	int fd;
61184257Slulf
62184257Slulf	char *blockptr;
63184257Slulf	int blocknum;
64184257Slulf	char blockmd5[MD5_DIGEST_SIZE];
65184257Slulf	char rsumstr[RSUM_SIZE];
66184257Slulf	uint32_t rsum;
67184257Slulf};
68184257Slulf
69184257Slulfstatic size_t	rsync_chooseblocksize(size_t);
70184257Slulfstatic uint32_t	rsync_rollsum(uint8_t *, size_t);
71184257Slulf
72184257Slulf/* Open a file and initialize variable for rsync operation. */
73184257Slulfstruct rsyncfile *
74184257Slulfrsync_open(char *path, size_t blocksize, int read)
75184257Slulf{
76184257Slulf	struct rsyncfile *rf;
77184257Slulf	struct stat st;
78184257Slulf	int error;
79184257Slulf
80184257Slulf	rf = malloc(sizeof(*rf));
81184257Slulf	if (rf == NULL)
82184257Slulf		return (NULL);
83184257Slulf	error = stat(path, &st);
84184257Slulf	if (error) {
85184257Slulf		free(rf);
86184257Slulf		return (NULL);
87184257Slulf	}
88184257Slulf	rf->fsize = st.st_size;
89184257Slulf	rf->fa = fattr_fromstat(&st);
90184257Slulf
91184257Slulf	rf->fd = open(path, read ? O_RDONLY : O_RDWR);
92184257Slulf	if (rf->fd < 0) {
93184257Slulf		free(rf);
94184257Slulf		return (NULL);
95184257Slulf	}
96184257Slulf	rf->buf = mmap(0, rf->fsize, PROT_READ, MAP_SHARED, rf->fd, 0);
97184257Slulf	if (rf->buf == MAP_FAILED) {
98184257Slulf		free(rf);
99184257Slulf		return (NULL);
100184257Slulf	}
101184257Slulf	rf->start = rf->buf;
102184257Slulf	rf->end = rf->buf + rf->fsize;
103184257Slulf	rf->blocksize = (blocksize == 0 ? rsync_chooseblocksize(rf->fsize) :
104184257Slulf	    blocksize);
105184257Slulf	rf->blockptr = rf->buf;
106184257Slulf	rf->blocknum = 0;
107184257Slulf	return (rf);
108184257Slulf}
109184257Slulf
110184257Slulf/* Close and free all resources related to an rsync file transfer. */
111184257Slulfint
112184257Slulfrsync_close(struct rsyncfile *rf)
113184257Slulf{
114184257Slulf	int error;
115184257Slulf
116184257Slulf	error = munmap(rf->buf, rf->fsize);
117184257Slulf	if (error)
118184257Slulf		return (error);
119184257Slulf	close(rf->fd);
120184257Slulf	free(rf);
121184257Slulf}
122184257Slulf
123184257Slulf/*
124184257Slulf * Choose the most appropriate block size for an rsync transfer. Modeled
125184257Slulf * algorithm after cvsup.
126184257Slulf */
127184257Slulfstatic size_t
128184257Slulfrsync_chooseblocksize(size_t fsize)
129184257Slulf{
130184257Slulf	size_t bestrem, blocksize, bs, hisearch, losearch, rem;
131184257Slulf
132184257Slulf	blocksize = fsize / MAXBLOCKS;
133184257Slulf	losearch = blocksize - SEARCHREGION;
134184257Slulf	hisearch = blocksize + SEARCHREGION;
135184257Slulf
136184257Slulf	if (losearch < MINBLOCKSIZE) {
137184257Slulf		losearch = MINBLOCKSIZE;
138184257Slulf		hisearch = losearch + (2 * SEARCHREGION);
139184257Slulf	} else if (hisearch > MAXBLOCKSIZE) {
140184257Slulf		hisearch = MAXBLOCKSIZE;
141184257Slulf		losearch = hisearch - (2 * SEARCHREGION);
142184257Slulf	}
143184257Slulf
144184257Slulf	bestrem = MAXBLOCKSIZE;
145184257Slulf	for (bs = losearch; bs <= hisearch;) {
146184257Slulf		rem = fsize % bs;
147184257Slulf		if (rem < bestrem) {
148184257Slulf			bestrem = rem;
149184257Slulf			blocksize = bs;
150184257Slulf		}
151184257Slulf	}
152184257Slulf	return (bestrem);
153184257Slulf}
154184257Slulf
155184257Slulf/* Get the next rsync block of a file. */
156184257Slulfint
157184257Slulfrsync_nextblock(struct rsyncfile *rf)
158184257Slulf{
159184257Slulf	uint32_t rolling;
160184257Slulf	char *ptr;
161184257Slulf	MD5_CTX ctx;
162184257Slulf	size_t blocksize, i;
163184257Slulf
164184257Slulf	if (rf->blockptr >= rf->end)
165184257Slulf		return (0);
166184257Slulf	blocksize = min((rf->end - rf->blockptr), rf->blocksize);
167184257Slulf	/* Calculate MD5 of the block. */
168184257Slulf	MD5_Init(&ctx);
169184257Slulf	MD5_Update(&ctx, rf->blockptr, blocksize);
170184257Slulf	MD5_End(rf->blockmd5, &ctx);
171184257Slulf
172184257Slulf	rf->rsum = rsync_rollsum(rf->blockptr, blocksize);
173184257Slulf	snprintf(rf->rsumstr, RSUM_SIZE, "%x", rf->rsum);
174184257Slulf	rf->blocknum++;
175184257Slulf	rf->blockptr += blocksize;
176184257Slulf	return (1);
177184257Slulf}
178184257Slulf
179184257Slulf/* Get the rolling checksum of a file. */
180184257Slulfstatic uint32_t
181184257Slulfrsync_rollsum(uint8_t *buf, size_t len)
182184257Slulf{
183184257Slulf	uint32_t a, b;
184184257Slulf	uint8_t *ptr, *limit;
185184257Slulf
186184257Slulf	a = b = 0;
187184257Slulf	ptr = buf;
188184257Slulf	limit = buf + len;
189184257Slulf
190184257Slulf	while (ptr < limit) {
191184257Slulf		a += *ptr + CHAR_OFFSET;
192184257Slulf		b += a;
193184257Slulf		ptr++;
194184257Slulf	}
195184257Slulf	return ((b << 16) | a);
196184257Slulf}
197184257Slulf
198184257Slulf/* Get running sum so far. */
199184257Slulfchar *
200184257Slulfrsync_rsum(struct rsyncfile *rf)
201184257Slulf{
202184257Slulf
203184257Slulf	return (rf->rsumstr);
204184257Slulf}
205184257Slulf
206184257Slulf/* Get MD5 of current block. */
207184257Slulfchar *
208184257Slulfrsync_blockmd5(struct rsyncfile *rf)
209184257Slulf{
210184257Slulf
211184257Slulf	return (rf->blockmd5);
212184257Slulf}
213184257Slulf
214184257Slulf/* Accessor for blocksize. */
215184257Slulfsize_t
216184257Slulfrsync_blocksize(struct rsyncfile *rf)
217184257Slulf{
218184257Slulf
219184257Slulf	return (rf->blocksize);
220184257Slulf}
221184257Slulf
222184257Slulf/* Accessor for filesize. */
223184257Slulfsize_t
224184257Slulfrsync_filesize(struct rsyncfile *rf)
225184257Slulf{
226184257Slulf
227184257Slulf	return (rf->fsize);
228184257Slulf}
229