1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright 2003-2005 Colin Percival
5 * All rights reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted providing that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/usr.bin/bsdiff/bsdiff/bsdiff.c 330449 2018-03-05 07:26:05Z eadler $");
31
32#include <sys/types.h>
33
34#include <bzlib.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <stdint.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44
45#ifndef O_BINARY
46#define O_BINARY 0
47#endif
48
49#include "divsufsort64.h"
50#define saidx_t saidx64_t
51#define divsufsort divsufsort64
52
53#define MIN(x,y) (((x)<(y)) ? (x) : (y))
54
55static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
56{
57	off_t i;
58
59	for(i=0;(i<oldsize)&&(i<newsize);i++)
60		if(old[i]!=new[i]) break;
61
62	return i;
63}
64
65static off_t search(off_t *I,u_char *old,off_t oldsize,
66		u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
67{
68	off_t x,y;
69
70	if(en-st<2) {
71		x=matchlen(old+I[st],oldsize-I[st],new,newsize);
72		y=matchlen(old+I[en],oldsize-I[en],new,newsize);
73
74		if(x>y) {
75			*pos=I[st];
76			return x;
77		} else {
78			*pos=I[en];
79			return y;
80		}
81	}
82
83	x=st+(en-st)/2;
84	if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
85		return search(I,old,oldsize,new,newsize,x,en,pos);
86	} else {
87		return search(I,old,oldsize,new,newsize,st,x,pos);
88	};
89}
90
91static void offtout(off_t x,u_char *buf)
92{
93	off_t y;
94
95	if(x<0) y=-x; else y=x;
96
97		buf[0]=y%256;y-=buf[0];
98	y=y/256;buf[1]=y%256;y-=buf[1];
99	y=y/256;buf[2]=y%256;y-=buf[2];
100	y=y/256;buf[3]=y%256;y-=buf[3];
101	y=y/256;buf[4]=y%256;y-=buf[4];
102	y=y/256;buf[5]=y%256;y-=buf[5];
103	y=y/256;buf[6]=y%256;y-=buf[6];
104	y=y/256;buf[7]=y%256;
105
106	if(x<0) buf[7]|=0x80;
107}
108
109static void
110usage(void)
111{
112
113	fprintf(stderr, "usage: bsdiff oldfile newfile patchfile\n");
114	exit(1);
115}
116
117int main(int argc,char *argv[])
118{
119	int fd;
120	u_char *old,*new;
121	off_t oldsize,newsize;
122	saidx_t *I;
123	off_t scan,pos,len;
124	off_t lastscan,lastpos,lastoffset;
125	off_t oldscore,scsc;
126	off_t s,Sf,lenf,Sb,lenb;
127	off_t overlap,Ss,lens;
128	off_t i;
129	off_t dblen,eblen;
130	u_char *db,*eb;
131	u_char buf[8];
132	u_char header[32];
133	FILE * pf;
134	BZFILE * pfbz2;
135	int bz2err;
136
137	if (argc != 4)
138		usage();
139
140	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
141		that we never try to malloc(0) and get a NULL pointer */
142	if(((fd=open(argv[1],O_RDONLY|O_BINARY,0))<0) ||
143	    ((oldsize=lseek(fd,0,SEEK_END))==-1))
144		err(1, "%s", argv[1]);
145
146	if (oldsize > SSIZE_MAX ||
147	    (uintmax_t)oldsize >= SIZE_T_MAX / sizeof(off_t) ||
148	    oldsize == OFF_MAX) {
149		errno = EFBIG;
150		err(1, "%s", argv[1]);
151	}
152
153	if (((old=malloc(oldsize+1))==NULL) ||
154		(lseek(fd,0,SEEK_SET)!=0) ||
155		(read(fd,old,oldsize)!=oldsize) ||
156		(close(fd)==-1)) err(1,"%s",argv[1]);
157
158	if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL);
159
160	if(divsufsort(old, I, oldsize)) err(1, "divsufsort");
161
162	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
163		that we never try to malloc(0) and get a NULL pointer */
164	if(((fd=open(argv[2],O_RDONLY|O_BINARY,0))<0) ||
165	    ((newsize=lseek(fd,0,SEEK_END))==-1))
166		err(1, "%s", argv[2]);
167
168	if (newsize > SSIZE_MAX || (uintmax_t)newsize >= SIZE_T_MAX ||
169	    newsize == OFF_MAX) {
170		errno = EFBIG;
171		err(1, "%s", argv[2]);
172	}
173
174	if (((new=malloc(newsize+1))==NULL) ||
175		(lseek(fd,0,SEEK_SET)!=0) ||
176		(read(fd,new,newsize)!=newsize) ||
177		(close(fd)==-1)) err(1,"%s",argv[2]);
178
179	if(((db=malloc(newsize+1))==NULL) ||
180		((eb=malloc(newsize+1))==NULL)) err(1,NULL);
181	dblen=0;
182	eblen=0;
183
184	/* Create the patch file */
185	if ((pf = fopen(argv[3], "wb")) == NULL)
186		err(1, "%s", argv[3]);
187
188	/* Header is
189		0	8	 "BSDIFF40"
190		8	8	length of bzip2ed ctrl block
191		16	8	length of bzip2ed diff block
192		24	8	length of new file */
193	/* File is
194		0	32	Header
195		32	??	Bzip2ed ctrl block
196		??	??	Bzip2ed diff block
197		??	??	Bzip2ed extra block */
198	memcpy(header,"BSDIFF40",8);
199	offtout(0, header + 8);
200	offtout(0, header + 16);
201	offtout(newsize, header + 24);
202	if (fwrite(header, 32, 1, pf) != 1)
203		err(1, "fwrite(%s)", argv[3]);
204
205	/* Compute the differences, writing ctrl as we go */
206	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
207		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
208	scan=0;len=0;pos=0;
209	lastscan=0;lastpos=0;lastoffset=0;
210	while(scan<newsize) {
211		oldscore=0;
212
213		for(scsc=scan+=len;scan<newsize;scan++) {
214			len=search(I,old,oldsize,new+scan,newsize-scan,
215					0,oldsize,&pos);
216
217			for(;scsc<scan+len;scsc++)
218			if((scsc+lastoffset<oldsize) &&
219				(old[scsc+lastoffset] == new[scsc]))
220				oldscore++;
221
222			if(((len==oldscore) && (len!=0)) ||
223				(len>oldscore+8)) break;
224
225			if((scan+lastoffset<oldsize) &&
226				(old[scan+lastoffset] == new[scan]))
227				oldscore--;
228		}
229
230		if((len!=oldscore) || (scan==newsize)) {
231			s=0;Sf=0;lenf=0;
232			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
233				if(old[lastpos+i]==new[lastscan+i]) s++;
234				i++;
235				if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }
236			}
237
238			lenb=0;
239			if(scan<newsize) {
240				s=0;Sb=0;
241				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
242					if(old[pos-i]==new[scan-i]) s++;
243					if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }
244				}
245			}
246
247			if(lastscan+lenf>scan-lenb) {
248				overlap=(lastscan+lenf)-(scan-lenb);
249				s=0;Ss=0;lens=0;
250				for(i=0;i<overlap;i++) {
251					if(new[lastscan+lenf-overlap+i]==
252					   old[lastpos+lenf-overlap+i]) s++;
253					if(new[scan-lenb+i]==
254					   old[pos-lenb+i]) s--;
255					if(s>Ss) { Ss=s; lens=i+1; }
256				}
257
258				lenf+=lens-overlap;
259				lenb-=lens;
260			}
261
262			for(i=0;i<lenf;i++)
263				db[dblen+i]=new[lastscan+i]-old[lastpos+i];
264			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
265				eb[eblen+i]=new[lastscan+lenf+i];
266
267			dblen+=lenf;
268			eblen+=(scan-lenb)-(lastscan+lenf);
269
270			offtout(lenf,buf);
271			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
272			if (bz2err != BZ_OK)
273				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
274
275			offtout((scan-lenb)-(lastscan+lenf),buf);
276			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
277			if (bz2err != BZ_OK)
278				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
279
280			offtout((pos-lenb)-(lastpos+lenf),buf);
281			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
282			if (bz2err != BZ_OK)
283				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
284
285			lastscan=scan-lenb;
286			lastpos=pos-lenb;
287			lastoffset=pos-scan;
288		}
289	}
290	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
291	if (bz2err != BZ_OK)
292		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
293
294	/* Compute size of compressed ctrl data */
295	if ((len = ftello(pf)) == -1)
296		err(1, "ftello");
297	offtout(len-32, header + 8);
298
299	/* Write compressed diff data */
300	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
301		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
302	BZ2_bzWrite(&bz2err, pfbz2, db, dblen);
303	if (bz2err != BZ_OK)
304		errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
305	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
306	if (bz2err != BZ_OK)
307		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
308
309	/* Compute size of compressed diff data */
310	if ((newsize = ftello(pf)) == -1)
311		err(1, "ftello");
312	offtout(newsize - len, header + 16);
313
314	/* Write compressed extra data */
315	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
316		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
317	BZ2_bzWrite(&bz2err, pfbz2, eb, eblen);
318	if (bz2err != BZ_OK)
319		errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
320	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
321	if (bz2err != BZ_OK)
322		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
323
324	/* Seek to the beginning, write the header, and close the file */
325	if (fseeko(pf, 0, SEEK_SET))
326		err(1, "fseeko");
327	if (fwrite(header, 32, 1, pf) != 1)
328		err(1, "fwrite(%s)", argv[3]);
329	if (fclose(pf))
330		err(1, "fclose");
331
332	/* Free the memory we used */
333	free(db);
334	free(eb);
335	free(I);
336	free(old);
337	free(new);
338
339	return 0;
340}
341