1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright 2003-2005 Colin Percival
5 * All rights reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted providing that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/types.h>
30
31#include <bzlib.h>
32#include <err.h>
33#include <errno.h>
34#include <fcntl.h>
35#include <limits.h>
36#include <stdint.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <unistd.h>
41#include <sys/mman.h>
42
43#ifndef O_BINARY
44#define O_BINARY 0
45#endif
46
47#include "divsufsort64.h"
48#define saidx_t saidx64_t
49#define divsufsort divsufsort64
50
51#define MIN(x,y) (((x)<(y)) ? (x) : (y))
52
53static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
54{
55	off_t i;
56
57	for(i=0;(i<oldsize)&&(i<newsize);i++)
58		if(old[i]!=new[i]) break;
59
60	return i;
61}
62
63static off_t search(off_t *I,u_char *old,off_t oldsize,
64		u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
65{
66	off_t x,y;
67
68	if(en-st<2) {
69		x=matchlen(old+I[st],oldsize-I[st],new,newsize);
70		y=matchlen(old+I[en],oldsize-I[en],new,newsize);
71
72		if(x>y) {
73			*pos=I[st];
74			return x;
75		} else {
76			*pos=I[en];
77			return y;
78		}
79	}
80
81	x=st+(en-st)/2;
82	if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
83		return search(I,old,oldsize,new,newsize,x,en,pos);
84	} else {
85		return search(I,old,oldsize,new,newsize,st,x,pos);
86	};
87}
88
89static void offtout(off_t x,u_char *buf)
90{
91	off_t y;
92
93	if(x<0) y=-x; else y=x;
94
95		buf[0]=y%256;y-=buf[0];
96	y=y/256;buf[1]=y%256;y-=buf[1];
97	y=y/256;buf[2]=y%256;y-=buf[2];
98	y=y/256;buf[3]=y%256;y-=buf[3];
99	y=y/256;buf[4]=y%256;y-=buf[4];
100	y=y/256;buf[5]=y%256;y-=buf[5];
101	y=y/256;buf[6]=y%256;y-=buf[6];
102	y=y/256;buf[7]=y%256;
103
104	if(x<0) buf[7]|=0x80;
105}
106
107static void
108usage(void)
109{
110
111	fprintf(stderr, "usage: bsdiff oldfile newfile patchfile\n");
112	exit(1);
113}
114
115int main(int argc,char *argv[])
116{
117	int fd;
118	u_char *old,*new;
119	off_t oldsize,newsize,xnewsize;
120	saidx_t *I;
121	off_t scan,pos,len;
122	off_t lastscan,lastpos,lastoffset;
123	off_t oldscore,scsc;
124	off_t s,Sf,lenf,Sb,lenb;
125	off_t overlap,Ss,lens;
126	off_t i;
127	off_t dblen,eblen;
128	u_char *db,*eb;
129	u_char buf[8];
130	u_char header[32];
131	FILE * pf;
132	BZFILE * pfbz2;
133	int bz2err;
134
135	if (argc != 4)
136		usage();
137
138	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
139		that we never try to malloc(0) and get a NULL pointer */
140	if(((fd=open(argv[1],O_RDONLY|O_BINARY,0))<0) ||
141	    ((oldsize=lseek(fd,0,SEEK_END))==-1))
142		err(1, "%s", argv[1]);
143
144	if (oldsize > SSIZE_MAX ||
145	    (uintmax_t)oldsize >= SIZE_T_MAX / sizeof(off_t) ||
146	    oldsize == OFF_MAX) {
147		errno = EFBIG;
148		err(1, "%s", argv[1]);
149	}
150
151	old = mmap(NULL, oldsize+1, PROT_READ, MAP_SHARED, fd, 0);
152	if (old == MAP_FAILED || close(fd) == -1)
153		err(1, "%s", argv[1]);
154
155	if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL);
156
157	if(divsufsort(old, I, oldsize)) err(1, "divsufsort");
158
159	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
160		that we never try to malloc(0) and get a NULL pointer */
161	if(((fd=open(argv[2],O_RDONLY|O_BINARY,0))<0) ||
162	    ((newsize=lseek(fd,0,SEEK_END))==-1))
163		err(1, "%s", argv[2]);
164
165	if (newsize > SSIZE_MAX || (uintmax_t)newsize >= SIZE_T_MAX ||
166	    newsize == OFF_MAX) {
167		errno = EFBIG;
168		err(1, "%s", argv[2]);
169	}
170
171	new = mmap(NULL, newsize+1, PROT_READ, MAP_SHARED, fd, 0);
172	if (new == MAP_FAILED || close(fd) == -1)
173		err(1, "%s", argv[2]);
174
175	if(((db=malloc(newsize+1))==NULL) ||
176		((eb=malloc(newsize+1))==NULL)) err(1,NULL);
177	dblen=0;
178	eblen=0;
179
180	/* Create the patch file */
181	if ((pf = fopen(argv[3], "wb")) == NULL)
182		err(1, "%s", argv[3]);
183
184	/* Header is
185		0	8	 "BSDIFF40"
186		8	8	length of bzip2ed ctrl block
187		16	8	length of bzip2ed diff block
188		24	8	length of new file */
189	/* File is
190		0	32	Header
191		32	??	Bzip2ed ctrl block
192		??	??	Bzip2ed diff block
193		??	??	Bzip2ed extra block */
194	memcpy(header,"BSDIFF40",8);
195	offtout(0, header + 8);
196	offtout(0, header + 16);
197	offtout(newsize, header + 24);
198	if (fwrite(header, 32, 1, pf) != 1)
199		err(1, "fwrite(%s)", argv[3]);
200
201	/* Compute the differences, writing ctrl as we go */
202	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
203		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
204	scan=0;len=0;pos=0;
205	lastscan=0;lastpos=0;lastoffset=0;
206	while(scan<newsize) {
207		oldscore=0;
208
209		for(scsc=scan+=len;scan<newsize;scan++) {
210			len=search(I,old,oldsize,new+scan,newsize-scan,
211					0,oldsize-1,&pos);
212
213			for(;scsc<scan+len;scsc++)
214			if((scsc+lastoffset<oldsize) &&
215				(old[scsc+lastoffset] == new[scsc]))
216				oldscore++;
217
218			if(((len==oldscore) && (len!=0)) ||
219				(len>oldscore+8)) break;
220
221			if((scan+lastoffset<oldsize) &&
222				(old[scan+lastoffset] == new[scan]))
223				oldscore--;
224		}
225
226		if((len!=oldscore) || (scan==newsize)) {
227			s=0;Sf=0;lenf=0;
228			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
229				if(old[lastpos+i]==new[lastscan+i]) s++;
230				i++;
231				if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }
232			}
233
234			lenb=0;
235			if(scan<newsize) {
236				s=0;Sb=0;
237				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
238					if(old[pos-i]==new[scan-i]) s++;
239					if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }
240				}
241			}
242
243			if(lastscan+lenf>scan-lenb) {
244				overlap=(lastscan+lenf)-(scan-lenb);
245				s=0;Ss=0;lens=0;
246				for(i=0;i<overlap;i++) {
247					if(new[lastscan+lenf-overlap+i]==
248					   old[lastpos+lenf-overlap+i]) s++;
249					if(new[scan-lenb+i]==
250					   old[pos-lenb+i]) s--;
251					if(s>Ss) { Ss=s; lens=i+1; }
252				}
253
254				lenf+=lens-overlap;
255				lenb-=lens;
256			}
257
258			for(i=0;i<lenf;i++)
259				db[dblen+i]=new[lastscan+i]-old[lastpos+i];
260			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
261				eb[eblen+i]=new[lastscan+lenf+i];
262
263			dblen+=lenf;
264			eblen+=(scan-lenb)-(lastscan+lenf);
265
266			offtout(lenf,buf);
267			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
268			if (bz2err != BZ_OK)
269				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
270
271			offtout((scan-lenb)-(lastscan+lenf),buf);
272			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
273			if (bz2err != BZ_OK)
274				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
275
276			offtout((pos-lenb)-(lastpos+lenf),buf);
277			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
278			if (bz2err != BZ_OK)
279				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
280
281			lastscan=scan-lenb;
282			lastpos=pos-lenb;
283			lastoffset=pos-scan;
284		}
285	}
286	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
287	if (bz2err != BZ_OK)
288		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
289
290	/* Compute size of compressed ctrl data */
291	if ((len = ftello(pf)) == -1)
292		err(1, "ftello");
293	offtout(len-32, header + 8);
294
295	/* Write compressed diff data */
296	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
297		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
298	BZ2_bzWrite(&bz2err, pfbz2, db, dblen);
299	if (bz2err != BZ_OK)
300		errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
301	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
302	if (bz2err != BZ_OK)
303		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
304
305	/* Compute size of compressed diff data */
306	if ((xnewsize = ftello(pf)) == -1)
307		err(1, "ftello");
308	offtout(xnewsize - len, header + 16);
309
310	/* Write compressed extra data */
311	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
312		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
313	BZ2_bzWrite(&bz2err, pfbz2, eb, eblen);
314	if (bz2err != BZ_OK)
315		errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
316	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
317	if (bz2err != BZ_OK)
318		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
319
320	/* Seek to the beginning, write the header, and close the file */
321	if (fseeko(pf, 0, SEEK_SET))
322		err(1, "fseeko");
323	if (fwrite(header, 32, 1, pf) != 1)
324		err(1, "fwrite(%s)", argv[3]);
325	if (fclose(pf))
326		err(1, "fclose");
327
328	/* Free the memory we used */
329	free(db);
330	free(eb);
331	free(I);
332	munmap(old, oldsize+1);
333	munmap(new, newsize+1);
334
335	return 0;
336}
337