rsyncfile.c revision 186781
137535Sdes/*-
237535Sdes * Copyright (c) 2008-2009, Ulf Lilleengen <lulf@FreeBSD.org>
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer.
1037535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1137535Sdes *    notice, this list of conditions and the following disclaimer in the
1237535Sdes *    documentation and/or other materials provided with the distribution.
1337535Sdes *
1437535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1537535Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1637535Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1737535Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1837535Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1937535Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2037535Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2137535Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2237535Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2337535Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2437535Sdes * SUCH DAMAGE.
2537535Sdes *
2637535Sdes * $FreeBSD: head/contrib/csup/rsyncfile.c 186781 2009-01-05 15:18:16Z lulf $
2737535Sdes */
2850476Speter
2937535Sdes#include <errno.h>
3037535Sdes#include <string.h>
3137535Sdes#include <stdlib.h>
3237571Sdes#include <stdio.h>
3337535Sdes
3437535Sdes#include <sys/types.h>
3537535Sdes#include <sys/stat.h>
3637535Sdes#include <sys/mman.h>
3737535Sdes#include <fcntl.h>
3837535Sdes#include <unistd.h>
3937535Sdes
4037535Sdes#include "misc.h"
4137535Sdes#include "rsyncfile.h"
4237535Sdes
4337535Sdes#define MINBLOCKSIZE 1024
4437535Sdes#define MAXBLOCKSIZE (16 * 1024)
4537535Sdes#define RECEIVEBUFFERSIZE (15 * 1024)
4637535Sdes#define BLOCKINFOSIZE 26
4737535Sdes#define SEARCHREGION 10
4837535Sdes#define MAXBLOCKS (RECEIVEBUFFERSIZE / BLOCKINFOSIZE)
4937535Sdes
5037535Sdes#define CHAR_OFFSET 3
5137535Sdes#define RSUM_SIZE 9
5237535Sdes
5337535Sdesstruct rsyncfile {
5437535Sdes	char *start;
5537535Sdes	char *buf;
5637535Sdes	char *end;
5737535Sdes	size_t blocksize;
5841862Sdes	size_t fsize;
5937535Sdes	int fd;
6037535Sdes
6137535Sdes	char *blockptr;
6237535Sdes	int blocknum;
6355557Sdes	char blockmd5[MD5_DIGEST_SIZE];
6467430Sdes	char rsumstr[RSUM_SIZE];
6560188Sdes	uint32_t rsum;
6637573Sdes};
6737535Sdes
6837571Sdesstatic size_t		rsync_chooseblocksize(size_t);
6937535Sdesstatic uint32_t		rsync_rollsum(char *, size_t);
7041869Sdes
7137571Sdes/* Open a file and initialize variable for rsync operation. */
7237535Sdesstruct rsyncfile *
7337535Sdesrsync_open(char *path, size_t blocksize, int rdonly)
7440939Sdes{
7541862Sdes	struct rsyncfile *rf;
7637535Sdes	struct stat st;
7737535Sdes	int error;
7837535Sdes
7937535Sdes	rf = xmalloc(sizeof(*rf));
8064883Sdes	error = stat(path, &st);
8137573Sdes	if (error) {
8237573Sdes		free(rf);
8341869Sdes		return (NULL);
8441863Sdes	}
8567890Sdes	rf->fsize = st.st_size;
8637573Sdes
8760737Sume	rf->fd = open(path, rdonly ? O_RDONLY : O_RDWR);
8860737Sume	if (rf->fd < 0) {
8937573Sdes		free(rf);
9037573Sdes		return (NULL);
9137573Sdes	}
9237573Sdes	rf->buf = mmap(0, rf->fsize, PROT_READ, MAP_SHARED, rf->fd, 0);
9360188Sdes	if (rf->buf == MAP_FAILED) {
9455557Sdes		free(rf);
9563336Sdes		return (NULL);
9637573Sdes	}
9740975Sdes	rf->start = rf->buf;
9855557Sdes	rf->end = rf->buf + rf->fsize;
9937535Sdes	rf->blocksize = (blocksize == 0 ? rsync_chooseblocksize(rf->fsize) :
10055557Sdes	    blocksize);
10155557Sdes	rf->blockptr = rf->buf;
10255557Sdes	rf->blocknum = 0;
10337571Sdes	return (rf);
10455557Sdes}
10560707Sdes
10660707Sdes/* Close and free all resources related to an rsync file transfer. */
10755557Sdesint
10855557Sdesrsync_close(struct rsyncfile *rf)
10955557Sdes{
11060737Sume	int error;
11160737Sume
11260737Sume	error = munmap(rf->buf, rf->fsize);
11360737Sume	if (error)
11460737Sume		return (error);
11560737Sume	close(rf->fd);
11660737Sume	free(rf);
11760737Sume	return (0);
11860737Sume}
11960737Sume
12060737Sume/*
12160737Sume * Choose the most appropriate block size for an rsync transfer. Modeled
12260737Sume * algorithm after cvsup.
12360737Sume */
12460737Sumestatic size_t
12560737Sumersync_chooseblocksize(size_t fsize)
12660737Sume{
12760737Sume	size_t bestrem, blocksize, bs, hisearch, losearch, rem;
12860737Sume
12960737Sume	blocksize = fsize / MAXBLOCKS;
13060737Sume	losearch = blocksize - SEARCHREGION;
13137571Sdes	hisearch = blocksize + SEARCHREGION;
13255557Sdes
13337535Sdes	if (losearch < MINBLOCKSIZE) {
13437535Sdes		losearch = MINBLOCKSIZE;
13555557Sdes		hisearch = losearch + (2 * SEARCHREGION);
13637535Sdes	} else if (hisearch > MAXBLOCKSIZE) {
13762215Sdes		hisearch = MAXBLOCKSIZE;
13862215Sdes		losearch = hisearch - (2 * SEARCHREGION);
13962215Sdes	}
14062215Sdes
14162215Sdes	bestrem = MAXBLOCKSIZE;
14262215Sdes	for (bs = losearch; bs <= hisearch; bs++) {
14362215Sdes		rem = fsize % bs;
14462215Sdes		if (rem < bestrem) {
14562215Sdes			bestrem = rem;
14662215Sdes			blocksize = bs;
14762215Sdes		}
14862215Sdes	}
14955557Sdes	return (bestrem);
15055557Sdes}
15155557Sdes
15255557Sdes/* Get the next rsync block of a file. */
15337573Sdesint
15455557Sdesrsync_nextblock(struct rsyncfile *rf)
15563336Sdes{
15637535Sdes	MD5_CTX ctx;
15737571Sdes	size_t blocksize;
15837535Sdes
15955557Sdes	if (rf->blockptr >= rf->end)
16055557Sdes		return (0);
16155557Sdes	blocksize = min((size_t)(rf->end - rf->blockptr), rf->blocksize);
16255557Sdes	/* Calculate MD5 of the block. */
16355557Sdes	MD5_Init(&ctx);
16437535Sdes	MD5_Update(&ctx, rf->blockptr, blocksize);
16537535Sdes	MD5_End(rf->blockmd5, &ctx);
16637535Sdes
16737573Sdes	rf->rsum = rsync_rollsum(rf->blockptr, blocksize);
16837535Sdes	snprintf(rf->rsumstr, RSUM_SIZE, "%x", rf->rsum);
16937535Sdes	rf->blocknum++;
17055557Sdes	rf->blockptr += blocksize;
17137535Sdes	return (1);
17237573Sdes}
17362982Sdes
17455557Sdes/* Get the rolling checksum of a file. */
17555557Sdesstatic uint32_t
17637573Sdesrsync_rollsum(char *buf, size_t len)
17737573Sdes{
17862982Sdes	uint32_t a, b;
17955557Sdes	char *ptr, *limit;
18055557Sdes
18155557Sdes	a = b = 0;
18255557Sdes	ptr = buf;
18355557Sdes	limit = buf + len;
18455557Sdes
18555557Sdes	while (ptr < limit) {
18662982Sdes		a += *ptr + CHAR_OFFSET;
18762982Sdes		b += a;
18855557Sdes		ptr++;
18962982Sdes	}
19055557Sdes	return ((b << 16) | a);
19155557Sdes}
19255557Sdes
19355557Sdes/* Get running sum so far. */
19437571Sdeschar *
19555557Sdesrsync_rsum(struct rsyncfile *rf)
19637535Sdes{
19737535Sdes
19837535Sdes	return (rf->rsumstr);
19963340Sdes}
20063340Sdes
20163340Sdes/* Get MD5 of current block. */
20263340Sdeschar *
20363340Sdesrsync_blockmd5(struct rsyncfile *rf)
20463340Sdes{
20563340Sdes
20663340Sdes	return (rf->blockmd5);
20763340Sdes}
20863340Sdes
20963340Sdes/* Accessor for blocksize. */
21063340Sdessize_t
21163340Sdesrsync_blocksize(struct rsyncfile *rf)
21263340Sdes{
21363340Sdes
21463340Sdes	return (rf->blocksize);
21563340Sdes}
21663340Sdes
21763340Sdes/* Accessor for filesize. */
21863340Sdessize_t
21963340Sdesrsync_filesize(struct rsyncfile *rf)
22063340Sdes{
22163340Sdes
22263585Sdes	return (rf->fsize);
22363340Sdes}
22463340Sdes