rsyncfile.c revision 186743
1235633Sdim/*- 2235633Sdim * Copyright (c) 2008-2009, Ulf Lilleengen <lulf@FreeBSD.org> 3235633Sdim * All rights reserved. 4235633Sdim * 5235633Sdim * Redistribution and use in source and binary forms, with or without 6235633Sdim * modification, are permitted provided that the following conditions 7235633Sdim * are met: 8235633Sdim * 1. Redistributions of source code must retain the above copyright 9235633Sdim * notice, this list of conditions and the following disclaimer. 10235633Sdim * 2. Redistributions in binary form must reproduce the above copyright 11235633Sdim * notice, this list of conditions and the following disclaimer in the 12235633Sdim * documentation and/or other materials provided with the distribution. 13235633Sdim * 14235633Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15235633Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16226584Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17235633Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18226584Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19226584Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20226584Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21235633Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22226584Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23226584Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24226584Sdim * SUCH DAMAGE. 25226584Sdim * 26235633Sdim * $FreeBSD: projects/csup_cvsmode/contrib/csup/rsyncfile.c 186743 2009-01-04 17:31:01Z lulf $ 27226584Sdim */ 28226584Sdim 29235633Sdim#include <errno.h> 30235633Sdim#include <string.h> 31235633Sdim#include <stdlib.h> 32235633Sdim#include <stdio.h> 33235633Sdim 34235633Sdim#include <sys/types.h> 35235633Sdim#include <sys/stat.h> 36235633Sdim#include <sys/mman.h> 37235633Sdim#include <fcntl.h> 38245431Sdim#include <unistd.h> 39235633Sdim 40252723Sdim#include "misc.h" 41245431Sdim#include "rsyncfile.h" 42245431Sdim 43245431Sdim#define MINBLOCKSIZE 1024 44245431Sdim#define MAXBLOCKSIZE (16 * 1024) 45245431Sdim#define RECEIVEBUFFERSIZE (15 * 1024) 46245431Sdim#define BLOCKINFOSIZE 26 47245431Sdim#define SEARCHREGION 10 48263509Sdim#define MAXBLOCKS (RECEIVEBUFFERSIZE / BLOCKINFOSIZE) 49263509Sdim 50263509Sdim#define CHAR_OFFSET 3 51263509Sdim#define RSUM_SIZE 9 52235633Sdim 53235633Sdimstruct rsyncfile { 54235633Sdim char *start; 55235633Sdim char *buf; 56235633Sdim char *end; 57235633Sdim size_t blocksize; 58235633Sdim size_t fsize; 59235633Sdim int fd; 60235633Sdim 61235633Sdim char *blockptr; 62235633Sdim int blocknum; 63235633Sdim char blockmd5[MD5_DIGEST_SIZE]; 64235633Sdim char rsumstr[RSUM_SIZE]; 65235633Sdim uint32_t rsum; 66235633Sdim}; 67235633Sdim 68235633Sdimstatic size_t rsync_chooseblocksize(size_t); 69235633Sdimstatic uint32_t rsync_rollsum(char *, size_t); 70245431Sdim 71245431Sdim/* Open a file and initialize variable for rsync operation. */ 72263509Sdimstruct rsyncfile * 73245431Sdimrsync_open(char *path, size_t blocksize, int rdonly) 74235633Sdim{ 75235633Sdim struct rsyncfile *rf; 76245431Sdim struct stat st; 77245431Sdim int error; 78245431Sdim 79245431Sdim rf = xmalloc(sizeof(*rf)); 80245431Sdim error = stat(path, &st); 81245431Sdim if (error) { 82245431Sdim free(rf); 83245431Sdim return (NULL); 84263509Sdim } 85263509Sdim rf->fsize = st.st_size; 86263509Sdim 87263509Sdim rf->fd = open(path, rdonly ? O_RDONLY : O_RDWR); 88263509Sdim if (rf->fd < 0) { 89263509Sdim free(rf); 90263509Sdim return (NULL); 91235633Sdim } 92235633Sdim rf->buf = mmap(0, rf->fsize, PROT_READ, MAP_SHARED, rf->fd, 0); 93235633Sdim if (rf->buf == MAP_FAILED) { 94235633Sdim free(rf); 95235633Sdim return (NULL); 96226584Sdim } 97235633Sdim rf->start = rf->buf; 98235633Sdim rf->end = rf->buf + rf->fsize; 99235633Sdim rf->blocksize = (blocksize == 0 ? rsync_chooseblocksize(rf->fsize) : 100235633Sdim blocksize); 101226584Sdim rf->blockptr = rf->buf; 102226584Sdim rf->blocknum = 0; 103235633Sdim return (rf); 104235633Sdim} 105235633Sdim 106226584Sdim/* Close and free all resources related to an rsync file transfer. */ 107235633Sdimint 108245431Sdimrsync_close(struct rsyncfile *rf) 109245431Sdim{ 110226584Sdim int error; 111226584Sdim 112245431Sdim error = munmap(rf->buf, rf->fsize); 113226584Sdim if (error) 114226584Sdim return (error); 115235633Sdim close(rf->fd); 116226584Sdim free(rf); 117235633Sdim return (0); 118235633Sdim} 119235633Sdim 120235633Sdim/* 121235633Sdim * Choose the most appropriate block size for an rsync transfer. Modeled 122235633Sdim * algorithm after cvsup. 123235633Sdim */ 124235633Sdimstatic size_t 125235633Sdimrsync_chooseblocksize(size_t fsize) 126235633Sdim{ 127235633Sdim size_t bestrem, blocksize, bs, hisearch, losearch, rem; 128235633Sdim 129235633Sdim blocksize = fsize / MAXBLOCKS; 130235633Sdim losearch = blocksize - SEARCHREGION; 131235633Sdim hisearch = blocksize + SEARCHREGION; 132235633Sdim 133235633Sdim if (losearch < MINBLOCKSIZE) { 134235633Sdim losearch = MINBLOCKSIZE; 135235633Sdim hisearch = losearch + (2 * SEARCHREGION); 136235633Sdim } else if (hisearch > MAXBLOCKSIZE) { 137235633Sdim hisearch = MAXBLOCKSIZE; 138235633Sdim losearch = hisearch - (2 * SEARCHREGION); 139235633Sdim } 140235633Sdim 141235633Sdim bestrem = MAXBLOCKSIZE; 142235633Sdim for (bs = losearch; bs <= hisearch; bs++) { 143235633Sdim rem = fsize % bs; 144235633Sdim if (rem < bestrem) { 145235633Sdim bestrem = rem; 146235633Sdim blocksize = bs; 147235633Sdim } 148235633Sdim } 149235633Sdim return (bestrem); 150245431Sdim} 151245431Sdim 152235633Sdim/* Get the next rsync block of a file. */ 153235633Sdimint 154235633Sdimrsync_nextblock(struct rsyncfile *rf) 155235633Sdim{ 156235633Sdim MD5_CTX ctx; 157235633Sdim size_t blocksize; 158235633Sdim 159226584Sdim if (rf->blockptr >= rf->end) 160226584Sdim return (0); 161235633Sdim blocksize = min((size_t)(rf->end - rf->blockptr), rf->blocksize); 162235633Sdim /* Calculate MD5 of the block. */ 163235633Sdim MD5_Init(&ctx); 164235633Sdim MD5_Update(&ctx, rf->blockptr, blocksize); 165235633Sdim MD5_End(rf->blockmd5, &ctx); 166235633Sdim 167235633Sdim rf->rsum = rsync_rollsum(rf->blockptr, blocksize); 168235633Sdim snprintf(rf->rsumstr, RSUM_SIZE, "%x", rf->rsum); 169235633Sdim rf->blocknum++; 170235633Sdim rf->blockptr += blocksize; 171235633Sdim return (1); 172235633Sdim} 173235633Sdim 174235633Sdim/* Get the rolling checksum of a file. */ 175235633Sdimstatic uint32_t 176235633Sdimrsync_rollsum(char *buf, size_t len) 177235633Sdim{ 178235633Sdim uint32_t a, b; 179235633Sdim char *ptr, *limit; 180235633Sdim 181235633Sdim a = b = 0; 182235633Sdim ptr = buf; 183235633Sdim limit = buf + len; 184235633Sdim 185235633Sdim while (ptr < limit) { 186235633Sdim a += *ptr + CHAR_OFFSET; 187235633Sdim b += a; 188235633Sdim ptr++; 189235633Sdim } 190235633Sdim return ((b << 16) | a); 191235633Sdim} 192245431Sdim 193245431Sdim/* Get running sum so far. */ 194245431Sdimchar * 195245431Sdimrsync_rsum(struct rsyncfile *rf) 196245431Sdim{ 197245431Sdim 198245431Sdim return (rf->rsumstr); 199245431Sdim} 200245431Sdim 201245431Sdim/* Get MD5 of current block. */ 202245431Sdimchar * 203263509Sdimrsync_blockmd5(struct rsyncfile *rf) 204263509Sdim{ 205263509Sdim 206263509Sdim return (rf->blockmd5); 207263509Sdim} 208263509Sdim 209263509Sdim/* Accessor for blocksize. */ 210263509Sdimsize_t 211263509Sdimrsync_blocksize(struct rsyncfile *rf) 212263509Sdim{ 213263509Sdim 214263509Sdim return (rf->blocksize); 215263509Sdim} 216263509Sdim 217235633Sdim/* Accessor for filesize. */ 218235633Sdimsize_t 219235633Sdimrsync_filesize(struct rsyncfile *rf) 220235633Sdim{ 221235633Sdim 222235633Sdim return (rf->fsize); 223235633Sdim} 224235633Sdim