1148771Scperciva/*-
2330449Seadler * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3330449Seadler *
4148771Scperciva * Copyright 2003-2005 Colin Percival
5148771Scperciva * All rights reserved
6148771Scperciva *
7148771Scperciva * Redistribution and use in source and binary forms, with or without
8148771Scperciva * modification, are permitted providing that the following conditions
9148771Scperciva * are met:
10148771Scperciva * 1. Redistributions of source code must retain the above copyright
11148771Scperciva *    notice, this list of conditions and the following disclaimer.
12148771Scperciva * 2. Redistributions in binary form must reproduce the above copyright
13148771Scperciva *    notice, this list of conditions and the following disclaimer in the
14148771Scperciva *    documentation and/or other materials provided with the distribution.
15148771Scperciva *
16148771Scperciva * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17148771Scperciva * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18148771Scperciva * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19148771Scperciva * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
20148771Scperciva * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21148771Scperciva * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22148771Scperciva * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23148771Scperciva * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24148771Scperciva * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25148771Scperciva * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26148771Scperciva * POSSIBILITY OF SUCH DAMAGE.
27148771Scperciva */
28148771Scperciva
29148771Scperciva#include <sys/cdefs.h>
30148771Scperciva__FBSDID("$FreeBSD: stable/11/usr.bin/bsdiff/bsdiff/bsdiff.c 330449 2018-03-05 07:26:05Z eadler $");
31148771Scperciva
32148771Scperciva#include <sys/types.h>
33148771Scperciva
34148771Scperciva#include <bzlib.h>
35148771Scperciva#include <err.h>
36290329Sache#include <errno.h>
37148771Scperciva#include <fcntl.h>
38290329Sache#include <limits.h>
39290329Sache#include <stdint.h>
40148771Scperciva#include <stdio.h>
41148771Scperciva#include <stdlib.h>
42148771Scperciva#include <string.h>
43148771Scperciva#include <unistd.h>
44148771Scperciva
45164922Scperciva#ifndef O_BINARY
46164922Scperciva#define O_BINARY 0
47164922Scperciva#endif
48164922Scperciva
49309846Sdelphij#include "divsufsort64.h"
50309846Sdelphij#define saidx_t saidx64_t
51309846Sdelphij#define divsufsort divsufsort64
52309846Sdelphij
53148771Scperciva#define MIN(x,y) (((x)<(y)) ? (x) : (y))
54148771Scperciva
55148771Scpercivastatic off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
56148771Scperciva{
57148771Scperciva	off_t i;
58148771Scperciva
59148771Scperciva	for(i=0;(i<oldsize)&&(i<newsize);i++)
60148771Scperciva		if(old[i]!=new[i]) break;
61148771Scperciva
62148771Scperciva	return i;
63148771Scperciva}
64148771Scperciva
65148771Scpercivastatic off_t search(off_t *I,u_char *old,off_t oldsize,
66148771Scperciva		u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
67148771Scperciva{
68148771Scperciva	off_t x,y;
69148771Scperciva
70148771Scperciva	if(en-st<2) {
71148771Scperciva		x=matchlen(old+I[st],oldsize-I[st],new,newsize);
72148771Scperciva		y=matchlen(old+I[en],oldsize-I[en],new,newsize);
73148771Scperciva
74148771Scperciva		if(x>y) {
75148771Scperciva			*pos=I[st];
76148771Scperciva			return x;
77148771Scperciva		} else {
78148771Scperciva			*pos=I[en];
79148771Scperciva			return y;
80148771Scperciva		}
81298089Spfg	}
82148771Scperciva
83148771Scperciva	x=st+(en-st)/2;
84148771Scperciva	if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
85148771Scperciva		return search(I,old,oldsize,new,newsize,x,en,pos);
86148771Scperciva	} else {
87148771Scperciva		return search(I,old,oldsize,new,newsize,st,x,pos);
88148771Scperciva	};
89148771Scperciva}
90148771Scperciva
91148771Scpercivastatic void offtout(off_t x,u_char *buf)
92148771Scperciva{
93148771Scperciva	off_t y;
94148771Scperciva
95148771Scperciva	if(x<0) y=-x; else y=x;
96148771Scperciva
97148771Scperciva		buf[0]=y%256;y-=buf[0];
98148771Scperciva	y=y/256;buf[1]=y%256;y-=buf[1];
99148771Scperciva	y=y/256;buf[2]=y%256;y-=buf[2];
100148771Scperciva	y=y/256;buf[3]=y%256;y-=buf[3];
101148771Scperciva	y=y/256;buf[4]=y%256;y-=buf[4];
102148771Scperciva	y=y/256;buf[5]=y%256;y-=buf[5];
103148771Scperciva	y=y/256;buf[6]=y%256;y-=buf[6];
104148771Scperciva	y=y/256;buf[7]=y%256;
105148771Scperciva
106148771Scperciva	if(x<0) buf[7]|=0x80;
107148771Scperciva}
108148771Scperciva
109264823Sedstatic void
110264823Sedusage(void)
111264823Sed{
112264823Sed
113264823Sed	fprintf(stderr, "usage: bsdiff oldfile newfile patchfile\n");
114264823Sed	exit(1);
115264823Sed}
116264823Sed
117148771Scpercivaint main(int argc,char *argv[])
118148771Scperciva{
119148771Scperciva	int fd;
120148771Scperciva	u_char *old,*new;
121148771Scperciva	off_t oldsize,newsize;
122309846Sdelphij	saidx_t *I;
123148771Scperciva	off_t scan,pos,len;
124148771Scperciva	off_t lastscan,lastpos,lastoffset;
125148771Scperciva	off_t oldscore,scsc;
126148771Scperciva	off_t s,Sf,lenf,Sb,lenb;
127148771Scperciva	off_t overlap,Ss,lens;
128148771Scperciva	off_t i;
129148771Scperciva	off_t dblen,eblen;
130148771Scperciva	u_char *db,*eb;
131148771Scperciva	u_char buf[8];
132148771Scperciva	u_char header[32];
133148771Scperciva	FILE * pf;
134148771Scperciva	BZFILE * pfbz2;
135148771Scperciva	int bz2err;
136148771Scperciva
137264823Sed	if (argc != 4)
138264823Sed		usage();
139148771Scperciva
140148771Scperciva	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
141148771Scperciva		that we never try to malloc(0) and get a NULL pointer */
142164922Scperciva	if(((fd=open(argv[1],O_RDONLY|O_BINARY,0))<0) ||
143290329Sache	    ((oldsize=lseek(fd,0,SEEK_END))==-1))
144290329Sache		err(1, "%s", argv[1]);
145290329Sache
146290329Sache	if (oldsize > SSIZE_MAX ||
147290336Sache	    (uintmax_t)oldsize >= SIZE_T_MAX / sizeof(off_t) ||
148290336Sache	    oldsize == OFF_MAX) {
149290329Sache		errno = EFBIG;
150290329Sache		err(1, "%s", argv[1]);
151290329Sache	}
152290329Sache
153290329Sache	if (((old=malloc(oldsize+1))==NULL) ||
154148771Scperciva		(lseek(fd,0,SEEK_SET)!=0) ||
155148771Scperciva		(read(fd,old,oldsize)!=oldsize) ||
156148771Scperciva		(close(fd)==-1)) err(1,"%s",argv[1]);
157148771Scperciva
158309846Sdelphij	if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL);
159148771Scperciva
160309846Sdelphij	if(divsufsort(old, I, oldsize)) err(1, "divsufsort");
161148771Scperciva
162148771Scperciva	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
163148771Scperciva		that we never try to malloc(0) and get a NULL pointer */
164164922Scperciva	if(((fd=open(argv[2],O_RDONLY|O_BINARY,0))<0) ||
165290329Sache	    ((newsize=lseek(fd,0,SEEK_END))==-1))
166290329Sache		err(1, "%s", argv[2]);
167290329Sache
168290336Sache	if (newsize > SSIZE_MAX || (uintmax_t)newsize >= SIZE_T_MAX ||
169290336Sache	    newsize == OFF_MAX) {
170290329Sache		errno = EFBIG;
171290329Sache		err(1, "%s", argv[2]);
172290329Sache	}
173290329Sache
174290329Sache	if (((new=malloc(newsize+1))==NULL) ||
175148771Scperciva		(lseek(fd,0,SEEK_SET)!=0) ||
176148771Scperciva		(read(fd,new,newsize)!=newsize) ||
177148771Scperciva		(close(fd)==-1)) err(1,"%s",argv[2]);
178148771Scperciva
179148771Scperciva	if(((db=malloc(newsize+1))==NULL) ||
180148771Scperciva		((eb=malloc(newsize+1))==NULL)) err(1,NULL);
181148771Scperciva	dblen=0;
182148771Scperciva	eblen=0;
183148771Scperciva
184148771Scperciva	/* Create the patch file */
185164922Scperciva	if ((pf = fopen(argv[3], "wb")) == NULL)
186148771Scperciva		err(1, "%s", argv[3]);
187148771Scperciva
188148771Scperciva	/* Header is
189148771Scperciva		0	8	 "BSDIFF40"
190148771Scperciva		8	8	length of bzip2ed ctrl block
191148771Scperciva		16	8	length of bzip2ed diff block
192148771Scperciva		24	8	length of new file */
193148771Scperciva	/* File is
194148771Scperciva		0	32	Header
195148771Scperciva		32	??	Bzip2ed ctrl block
196148771Scperciva		??	??	Bzip2ed diff block
197148771Scperciva		??	??	Bzip2ed extra block */
198148771Scperciva	memcpy(header,"BSDIFF40",8);
199148771Scperciva	offtout(0, header + 8);
200148771Scperciva	offtout(0, header + 16);
201148771Scperciva	offtout(newsize, header + 24);
202148771Scperciva	if (fwrite(header, 32, 1, pf) != 1)
203148771Scperciva		err(1, "fwrite(%s)", argv[3]);
204148771Scperciva
205148771Scperciva	/* Compute the differences, writing ctrl as we go */
206148771Scperciva	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
207148771Scperciva		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
208229910Seadler	scan=0;len=0;pos=0;
209148771Scperciva	lastscan=0;lastpos=0;lastoffset=0;
210148771Scperciva	while(scan<newsize) {
211148771Scperciva		oldscore=0;
212148771Scperciva
213148771Scperciva		for(scsc=scan+=len;scan<newsize;scan++) {
214148771Scperciva			len=search(I,old,oldsize,new+scan,newsize-scan,
215148771Scperciva					0,oldsize,&pos);
216148771Scperciva
217148771Scperciva			for(;scsc<scan+len;scsc++)
218148771Scperciva			if((scsc+lastoffset<oldsize) &&
219148771Scperciva				(old[scsc+lastoffset] == new[scsc]))
220148771Scperciva				oldscore++;
221148771Scperciva
222148771Scperciva			if(((len==oldscore) && (len!=0)) ||
223148771Scperciva				(len>oldscore+8)) break;
224148771Scperciva
225148771Scperciva			if((scan+lastoffset<oldsize) &&
226148771Scperciva				(old[scan+lastoffset] == new[scan]))
227148771Scperciva				oldscore--;
228298089Spfg		}
229148771Scperciva
230148771Scperciva		if((len!=oldscore) || (scan==newsize)) {
231148771Scperciva			s=0;Sf=0;lenf=0;
232148771Scperciva			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
233148771Scperciva				if(old[lastpos+i]==new[lastscan+i]) s++;
234148771Scperciva				i++;
235298089Spfg				if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }
236298089Spfg			}
237148771Scperciva
238148771Scperciva			lenb=0;
239148771Scperciva			if(scan<newsize) {
240148771Scperciva				s=0;Sb=0;
241148771Scperciva				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
242148771Scperciva					if(old[pos-i]==new[scan-i]) s++;
243298089Spfg					if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }
244298089Spfg				}
245298089Spfg			}
246148771Scperciva
247148771Scperciva			if(lastscan+lenf>scan-lenb) {
248148771Scperciva				overlap=(lastscan+lenf)-(scan-lenb);
249148771Scperciva				s=0;Ss=0;lens=0;
250148771Scperciva				for(i=0;i<overlap;i++) {
251148771Scperciva					if(new[lastscan+lenf-overlap+i]==
252148771Scperciva					   old[lastpos+lenf-overlap+i]) s++;
253148771Scperciva					if(new[scan-lenb+i]==
254148771Scperciva					   old[pos-lenb+i]) s--;
255298089Spfg					if(s>Ss) { Ss=s; lens=i+1; }
256298089Spfg				}
257148771Scperciva
258148771Scperciva				lenf+=lens-overlap;
259148771Scperciva				lenb-=lens;
260298089Spfg			}
261148771Scperciva
262148771Scperciva			for(i=0;i<lenf;i++)
263148771Scperciva				db[dblen+i]=new[lastscan+i]-old[lastpos+i];
264148771Scperciva			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
265148771Scperciva				eb[eblen+i]=new[lastscan+lenf+i];
266148771Scperciva
267148771Scperciva			dblen+=lenf;
268148771Scperciva			eblen+=(scan-lenb)-(lastscan+lenf);
269148771Scperciva
270148771Scperciva			offtout(lenf,buf);
271148771Scperciva			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
272148771Scperciva			if (bz2err != BZ_OK)
273148771Scperciva				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
274148771Scperciva
275148771Scperciva			offtout((scan-lenb)-(lastscan+lenf),buf);
276148771Scperciva			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
277148771Scperciva			if (bz2err != BZ_OK)
278148771Scperciva				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
279148771Scperciva
280148771Scperciva			offtout((pos-lenb)-(lastpos+lenf),buf);
281148771Scperciva			BZ2_bzWrite(&bz2err, pfbz2, buf, 8);
282148771Scperciva			if (bz2err != BZ_OK)
283148771Scperciva				errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
284148771Scperciva
285148771Scperciva			lastscan=scan-lenb;
286148771Scperciva			lastpos=pos-lenb;
287148771Scperciva			lastoffset=pos-scan;
288298089Spfg		}
289298089Spfg	}
290148771Scperciva	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
291148771Scperciva	if (bz2err != BZ_OK)
292148771Scperciva		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
293148771Scperciva
294148771Scperciva	/* Compute size of compressed ctrl data */
295148771Scperciva	if ((len = ftello(pf)) == -1)
296148771Scperciva		err(1, "ftello");
297148771Scperciva	offtout(len-32, header + 8);
298148771Scperciva
299148771Scperciva	/* Write compressed diff data */
300148771Scperciva	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
301148771Scperciva		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
302148771Scperciva	BZ2_bzWrite(&bz2err, pfbz2, db, dblen);
303148771Scperciva	if (bz2err != BZ_OK)
304148771Scperciva		errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
305148771Scperciva	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
306148771Scperciva	if (bz2err != BZ_OK)
307148771Scperciva		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
308148771Scperciva
309148771Scperciva	/* Compute size of compressed diff data */
310148771Scperciva	if ((newsize = ftello(pf)) == -1)
311148771Scperciva		err(1, "ftello");
312148771Scperciva	offtout(newsize - len, header + 16);
313148771Scperciva
314148771Scperciva	/* Write compressed extra data */
315148771Scperciva	if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL)
316148771Scperciva		errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err);
317148771Scperciva	BZ2_bzWrite(&bz2err, pfbz2, eb, eblen);
318148771Scperciva	if (bz2err != BZ_OK)
319148771Scperciva		errx(1, "BZ2_bzWrite, bz2err = %d", bz2err);
320148771Scperciva	BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL);
321148771Scperciva	if (bz2err != BZ_OK)
322148771Scperciva		errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err);
323148771Scperciva
324148771Scperciva	/* Seek to the beginning, write the header, and close the file */
325148771Scperciva	if (fseeko(pf, 0, SEEK_SET))
326148771Scperciva		err(1, "fseeko");
327148771Scperciva	if (fwrite(header, 32, 1, pf) != 1)
328148771Scperciva		err(1, "fwrite(%s)", argv[3]);
329148771Scperciva	if (fclose(pf))
330148771Scperciva		err(1, "fclose");
331148771Scperciva
332148771Scperciva	/* Free the memory we used */
333148771Scperciva	free(db);
334148771Scperciva	free(eb);
335148771Scperciva	free(I);
336148771Scperciva	free(old);
337148771Scperciva	free(new);
338148771Scperciva
339148771Scperciva	return 0;
340148771Scperciva}
341