• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/gnulib-lib/
1/* Analyze differences between two vectors.
2
3   Copyright (C) 1988-1989, 1992-1995, 2001-2004, 2006, 2007 Free
4   Software Foundation, Inc.
5
6   This program is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19
20/* The basic idea is to consider two vectors as similar if, when
21   transforming the first vector into the second vector through a
22   sequence of edits (inserts and deletes of one element each),
23   this sequence is short - or equivalently, if the ordered list
24   of elements that are untouched by these edits is long.  For a
25   good introduction to the subject, read about the "Levenshtein
26   distance" in Wikipedia.
27
28   The basic algorithm is described in:
29   "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
30   Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
31   see especially section 4.2, which describes the variation used below.
32
33   The basic algorithm was independently discovered as described in:
34   "Algorithms for Approximate String Matching", E. Ukkonen,
35   Information and Control Vol. 64, 1985, pp. 100-118.
36
37   Unless the 'find_minimal' flag is set, this code uses the TOO_EXPENSIVE
38   heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
39   at the price of producing suboptimal output for large inputs with
40   many differences.  */
41
42/* Before including this file, you need to define:
43     ELEMENT                 The element type of the vectors being compared.
44     EQUAL                   A two-argument macro that tests two elements for
45                             equality.
46     OFFSET                  A signed integer type sufficient to hold the
47                             difference between two indices. Usually
48                             something like ssize_t.
49     EXTRA_CONTEXT_FIELDS    Declarations of fields for 'struct context'.
50     NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff].
51     NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff].
52     USE_HEURISTIC           (Optional) Define if you want to support the
53                             heuristic for large vectors.  */
54
55/* Maximum value of type OFFSET.  */
56#define OFFSET_MAX \
57  ((((OFFSET)1 << (sizeof (OFFSET) * CHAR_BIT - 2)) - 1) * 2 + 1)
58
59/* Use this to suppress gcc's `...may be used before initialized' warnings. */
60#ifndef IF_LINT
61# ifdef lint
62#  define IF_LINT(Code) Code
63# else
64#  define IF_LINT(Code) /* empty */
65# endif
66#endif
67
68/*
69 * Context of comparison operation.
70 */
71struct context
72{
73  /* Vectors being compared.  */
74  const ELEMENT *xvec;
75  const ELEMENT *yvec;
76
77  /* Extra fields.  */
78  EXTRA_CONTEXT_FIELDS
79
80  /* Vector, indexed by diagonal, containing 1 + the X coordinate of the point
81     furthest along the given diagonal in the forward search of the edit
82     matrix.  */
83  OFFSET *fdiag;
84
85  /* Vector, indexed by diagonal, containing the X coordinate of the point
86     furthest along the given diagonal in the backward search of the edit
87     matrix.  */
88  OFFSET *bdiag;
89
90  #ifdef USE_HEURISTIC
91  /* This corresponds to the diff -H flag.  With this heuristic, for
92     vectors with a constant small density of changes, the algorithm is
93     linear in the vectors size.  */
94  bool heuristic;
95  #endif
96
97  /* Edit scripts longer than this are too expensive to compute.  */
98  OFFSET too_expensive;
99
100  /* Snakes bigger than this are considered `big'.  */
101  #define SNAKE_LIMIT 20
102};
103
104struct partition
105{
106  /* Midpoints of this partition.  */
107  OFFSET xmid;
108  OFFSET ymid;
109
110  /* True if low half will be analyzed minimally.  */
111  bool lo_minimal;
112
113  /* Likewise for high half.  */
114  bool hi_minimal;
115};
116
117
118/* Find the midpoint of the shortest edit script for a specified portion
119   of the two vectors.
120
121   Scan from the beginnings of the vectors, and simultaneously from the ends,
122   doing a breadth-first search through the space of edit-sequence.
123   When the two searches meet, we have found the midpoint of the shortest
124   edit sequence.
125
126   If FIND_MINIMAL is true, find the minimal edit script regardless of
127   expense.  Otherwise, if the search is too expensive, use heuristics to
128   stop the search and report a suboptimal answer.
129
130   Set PART->(xmid,ymid) to the midpoint (XMID,YMID).  The diagonal number
131   XMID - YMID equals the number of inserted elements minus the number
132   of deleted elements (counting only elements before the midpoint).
133
134   Set PART->lo_minimal to true iff the minimal edit script for the
135   left half of the partition is known; similarly for PART->hi_minimal.
136
137   This function assumes that the first elements of the specified portions
138   of the two vectors do not match, and likewise that the last elements do not
139   match.  The caller must trim matching elements from the beginning and end
140   of the portions it is going to specify.
141
142   If we return the "wrong" partitions, the worst this can do is cause
143   suboptimal diff output.  It cannot cause incorrect diff output.  */
144
145static void
146diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal,
147      struct partition *part, struct context *ctxt)
148{
149  OFFSET *const fd = ctxt->fdiag;	/* Give the compiler a chance. */
150  OFFSET *const bd = ctxt->bdiag;	/* Additional help for the compiler. */
151  const ELEMENT *const xv = ctxt->xvec;	/* Still more help for the compiler. */
152  const ELEMENT *const yv = ctxt->yvec;	/* And more and more . . . */
153  const OFFSET dmin = xoff - ylim;	/* Minimum valid diagonal. */
154  const OFFSET dmax = xlim - yoff;	/* Maximum valid diagonal. */
155  const OFFSET fmid = xoff - yoff;	/* Center diagonal of top-down search. */
156  const OFFSET bmid = xlim - ylim;	/* Center diagonal of bottom-up search. */
157  OFFSET fmin = fmid;
158  OFFSET fmax = fmid;		/* Limits of top-down search. */
159  OFFSET bmin = bmid;
160  OFFSET bmax = bmid;		/* Limits of bottom-up search. */
161  OFFSET c;			/* Cost. */
162  bool odd = (fmid - bmid) & 1;	/* True if southeast corner is on an odd
163				   diagonal with respect to the northwest. */
164
165  fd[fmid] = xoff;
166  bd[bmid] = xlim;
167
168  for (c = 1;; ++c)
169    {
170      OFFSET d;			/* Active diagonal. */
171      bool big_snake = false;
172
173      /* Extend the top-down search by an edit step in each diagonal. */
174      if (fmin > dmin)
175	fd[--fmin - 1] = -1;
176      else
177	++fmin;
178      if (fmax < dmax)
179	fd[++fmax + 1] = -1;
180      else
181	--fmax;
182      for (d = fmax; d >= fmin; d -= 2)
183	{
184	  OFFSET x;
185	  OFFSET y;
186	  OFFSET tlo = fd[d - 1];
187	  OFFSET thi = fd[d + 1];
188	  OFFSET x0 = tlo < thi ? thi : tlo + 1;
189
190	  for (x = x0, y = x0 - d;
191	       x < xlim && y < ylim && EQUAL (xv[x], yv[y]);
192	       x++, y++)
193	    continue;
194	  if (x - x0 > SNAKE_LIMIT)
195	    big_snake = true;
196	  fd[d] = x;
197	  if (odd && bmin <= d && d <= bmax && bd[d] <= x)
198	    {
199	      part->xmid = x;
200	      part->ymid = y;
201	      part->lo_minimal = part->hi_minimal = true;
202	      return;
203	    }
204	}
205
206      /* Similarly extend the bottom-up search.  */
207      if (bmin > dmin)
208	bd[--bmin - 1] = OFFSET_MAX;
209      else
210	++bmin;
211      if (bmax < dmax)
212	bd[++bmax + 1] = OFFSET_MAX;
213      else
214	--bmax;
215      for (d = bmax; d >= bmin; d -= 2)
216	{
217	  OFFSET x;
218	  OFFSET y;
219	  OFFSET tlo = bd[d - 1];
220	  OFFSET thi = bd[d + 1];
221	  OFFSET x0 = tlo < thi ? tlo : thi - 1;
222
223	  for (x = x0, y = x0 - d;
224	       xoff < x && yoff < y && EQUAL (xv[x - 1], yv[y - 1]);
225	       x--, y--)
226	    continue;
227	  if (x0 - x > SNAKE_LIMIT)
228	    big_snake = true;
229	  bd[d] = x;
230	  if (!odd && fmin <= d && d <= fmax && x <= fd[d])
231	    {
232	      part->xmid = x;
233	      part->ymid = y;
234	      part->lo_minimal = part->hi_minimal = true;
235	      return;
236	    }
237	}
238
239      if (find_minimal)
240	continue;
241
242#ifdef USE_HEURISTIC
243      /* Heuristic: check occasionally for a diagonal that has made lots
244	 of progress compared with the edit distance.  If we have any
245	 such, find the one that has made the most progress and return it
246	 as if it had succeeded.
247
248	 With this heuristic, for vectors with a constant small density
249	 of changes, the algorithm is linear in the vector size.  */
250
251      if (200 < c && big_snake && ctxt->heuristic)
252	{
253	  OFFSET best = 0;
254
255	  for (d = fmax; d >= fmin; d -= 2)
256	    {
257	      OFFSET dd = d - fmid;
258	      OFFSET x = fd[d];
259	      OFFSET y = x - d;
260	      OFFSET v = (x - xoff) * 2 - dd;
261
262	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
263		{
264		  if (v > best
265		      && xoff + SNAKE_LIMIT <= x && x < xlim
266		      && yoff + SNAKE_LIMIT <= y && y < ylim)
267		    {
268		      /* We have a good enough best diagonal; now insist
269			 that it end with a significant snake.  */
270		      int k;
271
272		      for (k = 1; EQUAL (xv[x - k], yv[y - k]); k++)
273			if (k == SNAKE_LIMIT)
274			  {
275			    best = v;
276			    part->xmid = x;
277			    part->ymid = y;
278			    break;
279			  }
280		    }
281		}
282	    }
283	  if (best > 0)
284	    {
285	      part->lo_minimal = true;
286	      part->hi_minimal = false;
287	      return;
288	    }
289
290	  best = 0;
291	  for (d = bmax; d >= bmin; d -= 2)
292	    {
293	      OFFSET dd = d - bmid;
294	      OFFSET x = bd[d];
295	      OFFSET y = x - d;
296	      OFFSET v = (xlim - x) * 2 + dd;
297
298	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
299		{
300		  if (v > best
301		      && xoff < x && x <= xlim - SNAKE_LIMIT
302		      && yoff < y && y <= ylim - SNAKE_LIMIT)
303		    {
304		      /* We have a good enough best diagonal; now insist
305			 that it end with a significant snake.  */
306		      int k;
307
308		      for (k = 0; EQUAL (xv[x + k], yv[y + k]); k++)
309			if (k == SNAKE_LIMIT - 1)
310			  {
311			    best = v;
312			    part->xmid = x;
313			    part->ymid = y;
314			    break;
315			  }
316		    }
317		}
318	    }
319	  if (best > 0)
320	    {
321	      part->lo_minimal = false;
322	      part->hi_minimal = true;
323	      return;
324	    }
325	}
326#endif /* USE_HEURISTIC */
327
328      /* Heuristic: if we've gone well beyond the call of duty, give up
329	 and report halfway between our best results so far.  */
330      if (c >= ctxt->too_expensive)
331	{
332	  OFFSET fxybest;
333	  OFFSET fxbest IF_LINT (= 0);
334	  OFFSET bxybest;
335	  OFFSET bxbest IF_LINT (= 0);
336
337	  /* Find forward diagonal that maximizes X + Y.  */
338	  fxybest = -1;
339	  for (d = fmax; d >= fmin; d -= 2)
340	    {
341	      OFFSET x = MIN (fd[d], xlim);
342	      OFFSET y = x - d;
343	      if (ylim < y)
344		{
345		  x = ylim + d;
346		  y = ylim;
347		}
348	      if (fxybest < x + y)
349		{
350		  fxybest = x + y;
351		  fxbest = x;
352		}
353	    }
354
355	  /* Find backward diagonal that minimizes X + Y.  */
356	  bxybest = OFFSET_MAX;
357	  for (d = bmax; d >= bmin; d -= 2)
358	    {
359	      OFFSET x = MAX (xoff, bd[d]);
360	      OFFSET y = x - d;
361	      if (y < yoff)
362		{
363		  x = yoff + d;
364		  y = yoff;
365		}
366	      if (x + y < bxybest)
367		{
368		  bxybest = x + y;
369		  bxbest = x;
370		}
371	    }
372
373	  /* Use the better of the two diagonals.  */
374	  if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
375	    {
376	      part->xmid = fxbest;
377	      part->ymid = fxybest - fxbest;
378	      part->lo_minimal = true;
379	      part->hi_minimal = false;
380	    }
381	  else
382	    {
383	      part->xmid = bxbest;
384	      part->ymid = bxybest - bxbest;
385	      part->lo_minimal = false;
386	      part->hi_minimal = true;
387	    }
388	  return;
389	}
390    }
391}
392
393
394/* Compare in detail contiguous subsequences of the two vectors
395   which are known, as a whole, to match each other.
396
397   The subsequence of vector 0 is [XOFF, XLIM) and likewise for vector 1.
398
399   Note that XLIM, YLIM are exclusive bounds.  All indices into the vectors
400   are origin-0.
401
402   If FIND_MINIMAL, find a minimal difference no matter how
403   expensive it is.
404
405   The results are recorded by invoking NOTE_DELETE and NOTE_INSERT.  */
406
407static void
408compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
409	    bool find_minimal, struct context *ctxt)
410{
411  ELEMENT const *xv = ctxt->xvec; /* Help the compiler.  */
412  ELEMENT const *yv = ctxt->yvec;
413
414  /* Slide down the bottom initial diagonal.  */
415  while (xoff < xlim && yoff < ylim && EQUAL (xv[xoff], yv[yoff]))
416    {
417      xoff++;
418      yoff++;
419    }
420
421  /* Slide up the top initial diagonal. */
422  while (xoff < xlim && yoff < ylim && EQUAL (xv[xlim - 1], yv[ylim - 1]))
423    {
424      xlim--;
425      ylim--;
426    }
427
428  /* Handle simple cases. */
429  if (xoff == xlim)
430    while (yoff < ylim)
431      {
432	NOTE_INSERT (ctxt, yoff);
433	yoff++;
434      }
435  else if (yoff == ylim)
436    while (xoff < xlim)
437      {
438	NOTE_DELETE (ctxt, xoff);
439	xoff++;
440      }
441  else
442    {
443      struct partition part;
444
445      /* Find a point of correspondence in the middle of the vectors.  */
446      diag (xoff, xlim, yoff, ylim, find_minimal, &part, ctxt);
447
448      /* Use the partitions to split this problem into subproblems.  */
449      compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt);
450      compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt);
451    }
452}
453
454#undef ELEMENT
455#undef EQUAL
456#undef OFFSET
457#undef EXTRA_CONTEXT_FIELDS
458#undef NOTE_DELETE
459#undef NOTE_INSERT
460#undef USE_HEURISTIC
461#undef OFFSET_MAX
462