1/* Myers diff algorithm implementation, invented by Eugene W. Myers [1].
2 * Implementations of both the Myers Divide Et Impera (using linear space)
3 * and the canonical Myers algorithm (using quadratic space). */
4/*
5 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <stdbool.h>
21#include <stdint.h>
22#include <stdlib.h>
23#include <string.h>
24#include <stdio.h>
25#include <errno.h>
26
27#include <arraylist.h>
28#include <diff_main.h>
29
30#include "diff_internal.h"
31#include "diff_debug.h"
32
33/* Myers' diff algorithm [1] is nicely explained in [2].
34 * [1] http://www.xmailserver.org/diff2.pdf
35 * [2] https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/ ff.
36 *
37 * Myers approaches finding the smallest diff as a graph problem.
38 * The crux is that the original algorithm requires quadratic amount of memory:
39 * both sides' lengths added, and that squared. So if we're diffing lines of
40 * text, two files with 1000 lines each would blow up to a matrix of about
41 * 2000 * 2000 ints of state, about 16 Mb of RAM to figure out 2 kb of text.
42 * The solution is using Myers' "divide and conquer" extension algorithm, which
43 * does the original traversal from both ends of the files to reach a middle
44 * where these "snakes" touch, hence does not need to backtrace the traversal,
45 * and so gets away with only keeping a single column of that huge state matrix
46 * in memory.
47 */
48
49struct diff_box {
50	unsigned int left_start;
51	unsigned int left_end;
52	unsigned int right_start;
53	unsigned int right_end;
54};
55
56/* If the two contents of a file are A B C D E and X B C Y,
57 * the Myers diff graph looks like:
58 *
59 *   k0  k1
60 *    \   \
61 * k-1     0 1 2 3 4 5
62 *   \      A B C D E
63 *     0   o-o-o-o-o-o
64 *      X  | | | | | |
65 *     1   o-o-o-o-o-o
66 *      B  | |\| | | |
67 *     2   o-o-o-o-o-o
68 *      C  | | |\| | |
69 *     3   o-o-o-o-o-o
70 *      Y  | | | | | |\
71 *     4   o-o-o-o-o-o c1
72 *                  \ \
73 *                 c-1 c0
74 *
75 * Moving right means delete an atom from the left-hand-side,
76 * Moving down means add an atom from the right-hand-side.
77 * Diagonals indicate identical atoms on both sides, the challenge is to use as
78 * many diagonals as possible.
79 *
80 * The original Myers algorithm walks all the way from the top left to the
81 * bottom right, remembers all steps, and then backtraces to find the shortest
82 * path. However, that requires keeping the entire graph in memory, which needs
83 * quadratic space.
84 *
85 * Myers adds a variant that uses linear space -- note, not linear time, only
86 * linear space: walk forward and backward, find a meeting point in the middle,
87 * and recurse on the two separate sections. This is called "divide and
88 * conquer".
89 *
90 * d: the step number, starting with 0, a.k.a. the distance from the starting
91 *    point.
92 * k: relative index in the state array for the forward scan, indicating on
93 *    which diagonal through the diff graph we currently are.
94 * c: relative index in the state array for the backward scan, indicating the
95 *    diagonal number from the bottom up.
96 *
97 * The "divide and conquer" traversal through the Myers graph looks like this:
98 *
99 *      | d=   0   1   2   3      2   1   0
100 *  ----+--------------------------------------------
101 *  k=  |                                      c=
102 *   4  |                                       3
103 *      |
104 *   3  |                 3,0    5,2            2
105 *      |                /          \
106 *   2  |             2,0            5,3        1
107 *      |            /                 \
108 *   1  |         1,0     4,3 >= 4,3    5,4<--  0
109 *      |        /       /          \  /
110 *   0  |  -->0,0     3,3            4,4       -1
111 *      |        \   /              /
112 *  -1  |         0,1     1,2    3,4           -2
113 *      |            \   /
114 *  -2  |             0,2                      -3
115 *      |                \
116 *      |                 0,3
117 *      |  forward->                 <-backward
118 *
119 * x,y pairs here are the coordinates in the Myers graph:
120 * x = atom index in left-side source, y = atom index in the right-side source.
121 *
122 * Only one forward column and one backward column are kept in mem, each need at
123 * most left.len + 1 + right.len items.  Note that each d step occupies either
124 * the even or the odd items of a column: if e.g. the previous column is in the
125 * odd items, the next column is formed in the even items, without overwriting
126 * the previous column's results.
127 *
128 * Also note that from the diagonal index k and the x coordinate, the y
129 * coordinate can be derived:
130 *    y = x - k
131 * Hence the state array only needs to keep the x coordinate, i.e. the position
132 * in the left-hand file, and the y coordinate, i.e. position in the right-hand
133 * file, is derived from the index in the state array.
134 *
135 * The two traces meet at 4,3, the first step (here found in the forward
136 * traversal) where a forward position is on or past a backward traced position
137 * on the same diagonal.
138 *
139 * This divides the problem space into:
140 *
141 *         0 1 2 3 4 5
142 *          A B C D E
143 *     0   o-o-o-o-o
144 *      X  | | | | |
145 *     1   o-o-o-o-o
146 *      B  | |\| | |
147 *     2   o-o-o-o-o
148 *      C  | | |\| |
149 *     3   o-o-o-o-*-o   *: forward and backward meet here
150 *      Y          | |
151 *     4           o-o
152 *
153 * Doing the same on each section lead to:
154 *
155 *         0 1 2 3 4 5
156 *          A B C D E
157 *     0   o-o
158 *      X  | |
159 *     1   o-b    b: backward d=1 first reaches here (sliding up the snake)
160 *      B     \   f: then forward d=2 reaches here (sliding down the snake)
161 *     2       o     As result, the box from b to f is found to be identical;
162 *      C       \    leaving a top box from 0,0 to 1,1 and a bottom trivial
163 *     3         f-o tail 3,3 to 4,3.
164 *
165 *     3           o-*
166 *      Y            |
167 *     4             o   *: forward and backward meet here
168 *
169 * and solving the last top left box gives:
170 *
171 *         0 1 2 3 4 5
172 *          A B C D E           -A
173 *     0   o-o                  +X
174 *      X    |                   B
175 *     1     o                   C
176 *      B     \                 -D
177 *     2       o                -E
178 *      C       \               +Y
179 *     3         o-o-o
180 *      Y            |
181 *     4             o
182 *
183 */
184
185#define xk_to_y(X, K) ((X) - (K))
186#define xc_to_y(X, C, DELTA) ((X) - (C) + (DELTA))
187#define k_to_c(K, DELTA) ((K) + (DELTA))
188#define c_to_k(C, DELTA) ((C) - (DELTA))
189
190/* Do one forwards step in the "divide and conquer" graph traversal.
191 * left: the left side to diff.
192 * right: the right side to diff against.
193 * kd_forward: the traversal state for forwards traversal, modified by this
194 *             function.
195 *             This is carried over between invocations with increasing d.
196 *             kd_forward points at the center of the state array, allowing
197 *             negative indexes.
198 * kd_backward: the traversal state for backwards traversal, to find a meeting
199 *              point.
200 *              Since forwards is done first, kd_backward will be valid for d -
201 *              1, not d.
202 *              kd_backward points at the center of the state array, allowing
203 *              negative indexes.
204 * d: Step or distance counter, indicating for what value of d the kd_forward
205 *    should be populated.
206 *    For d == 0, kd_forward[0] is initialized, i.e. the first invocation should
207 *    be for d == 0.
208 * meeting_snake: resulting meeting point, if any.
209 * Return true when a meeting point has been identified.
210 */
211static int
212diff_divide_myers_forward(bool *found_midpoint,
213			  struct diff_data *left, struct diff_data *right,
214			  int *kd_forward, int *kd_backward, int d,
215			  struct diff_box *meeting_snake)
216{
217	int delta = (int)right->atoms.len - (int)left->atoms.len;
218	int k;
219	int x;
220	int prev_x;
221	int prev_y;
222	int x_before_slide;
223	*found_midpoint = false;
224
225	for (k = d; k >= -d; k -= 2) {
226		if (k < -(int)right->atoms.len || k > (int)left->atoms.len) {
227			/* This diagonal is completely outside of the Myers
228			 * graph, don't calculate it. */
229			if (k < 0) {
230				/* We are traversing negatively, and already
231				 * below the entire graph, nothing will come of
232				 * this. */
233				debug(" break\n");
234				break;
235			}
236			debug(" continue\n");
237			continue;
238		}
239		if (d == 0) {
240			/* This is the initializing step. There is no prev_k
241			 * yet, get the initial x from the top left of the Myers
242			 * graph. */
243			x = 0;
244			prev_x = x;
245			prev_y = xk_to_y(x, k);
246		}
247		/* Favoring "-" lines first means favoring moving rightwards in
248		 * the Myers graph.
249		 * For this, all k should derive from k - 1, only the bottom
250		 * most k derive from k + 1:
251		 *
252		 *      | d=   0   1   2
253		 *  ----+----------------
254		 *  k=  |
255		 *   2  |             2,0 <-- from prev_k = 2 - 1 = 1
256		 *      |            /
257		 *   1  |         1,0
258		 *      |        /
259		 *   0  |  -->0,0     3,3
260		 *      |       \\   /
261		 *  -1  |         0,1 <-- bottom most for d=1 from
262		 *      |           \\    prev_k = -1 + 1 = 0
263		 *  -2  |             0,2 <-- bottom most for d=2 from
264		 *                            prev_k = -2 + 1 = -1
265		 *
266		 * Except when a k + 1 from a previous run already means a
267		 * further advancement in the graph.
268		 * If k == d, there is no k + 1 and k - 1 is the only option.
269		 * If k < d, use k + 1 in case that yields a larger x. Also use
270		 * k + 1 if k - 1 is outside the graph.
271		 */
272		else if (k > -d
273			 && (k == d
274			     || (k - 1 >= -(int)right->atoms.len
275				 && kd_forward[k - 1] >= kd_forward[k + 1]))) {
276			/* Advance from k - 1.
277			 * From position prev_k, step to the right in the Myers
278			 * graph: x += 1.
279			 */
280			int prev_k = k - 1;
281			prev_x = kd_forward[prev_k];
282			prev_y = xk_to_y(prev_x, prev_k);
283			x = prev_x + 1;
284		} else {
285			/* The bottom most one.
286			 * From position prev_k, step to the bottom in the Myers
287			 * graph: y += 1.
288			 * Incrementing y is achieved by decrementing k while
289			 * keeping the same x.
290			 * (since we're deriving y from y = x - k).
291			 */
292			int prev_k = k + 1;
293			prev_x = kd_forward[prev_k];
294			prev_y = xk_to_y(prev_x, prev_k);
295			x = prev_x;
296		}
297
298		x_before_slide = x;
299		/* Slide down any snake that we might find here. */
300		while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len) {
301			bool same;
302			int r = diff_atom_same(&same,
303					       &left->atoms.head[x],
304					       &right->atoms.head[
305						xk_to_y(x, k)]);
306			if (r)
307				return r;
308			if (!same)
309				break;
310			x++;
311		}
312		kd_forward[k] = x;
313#if 0
314		if (x_before_slide != x) {
315			debug("  down %d similar lines\n", x - x_before_slide);
316		}
317
318#if DEBUG
319		{
320			int fi;
321			for (fi = d; fi >= k; fi--) {
322				debug("kd_forward[%d] = (%d, %d)\n", fi,
323				      kd_forward[fi], kd_forward[fi] - fi);
324			}
325		}
326#endif
327#endif
328
329		if (x < 0 || x > left->atoms.len
330		    || xk_to_y(x, k) < 0 || xk_to_y(x, k) > right->atoms.len)
331			continue;
332
333		/* Figured out a new forwards traversal, see if this has gone
334		 * onto or even past a preceding backwards traversal.
335		 *
336		 * If the delta in length is odd, then d and backwards_d hit the
337		 * same state indexes:
338		 *      | d=   0   1   2      1   0
339		 *  ----+----------------    ----------------
340		 *  k=  |                              c=
341		 *   4  |                               3
342		 *      |
343		 *   3  |                               2
344		 *      |                same
345		 *   2  |             2,0====5,3        1
346		 *      |            /          \
347		 *   1  |         1,0            5,4<-- 0
348		 *      |        /              /
349		 *   0  |  -->0,0     3,3====4,4       -1
350		 *      |        \   /
351		 *  -1  |         0,1                  -2
352		 *      |            \
353		 *  -2  |             0,2              -3
354		 *      |
355		 *
356		 * If the delta is even, they end up off-by-one, i.e. on
357		 * different diagonals:
358		 *
359		 *      | d=   0   1   2    1   0
360		 *  ----+----------------  ----------------
361		 *      |                            c=
362		 *   3  |                             3
363		 *      |
364		 *   2  |             2,0 off         2
365		 *      |            /   \\
366		 *   1  |         1,0      4,3        1
367		 *      |        /       //   \
368		 *   0  |  -->0,0     3,3      4,4<-- 0
369		 *      |        \   /        /
370		 *  -1  |         0,1      3,4       -1
371		 *      |            \   //
372		 *  -2  |             0,2            -2
373		 *      |
374		 *
375		 * So in the forward path, we can only match up diagonals when
376		 * the delta is odd.
377		 */
378		if ((delta & 1) == 0)
379			continue;
380		 /* Forwards is done first, so the backwards one was still at
381		  * d - 1. Can't do this for d == 0. */
382		int backwards_d = d - 1;
383		if (backwards_d < 0)
384			continue;
385
386		/* If both sides have the same length, forward and backward
387		 * start on the same diagonal, meaning the backwards state index
388		 * c == k.
389		 * As soon as the lengths are not the same, the backwards
390		 * traversal starts on a different diagonal, and c = k shifted
391		 * by the difference in length.
392		 */
393		int c = k_to_c(k, delta);
394
395		/* When the file sizes are very different, the traversal trees
396		 * start on far distant diagonals.
397		 * They don't necessarily meet straight on. See whether this
398		 * forward value is on a diagonal that is also valid in
399		 * kd_backward[], and match them if so. */
400		if (c >= -backwards_d && c <= backwards_d) {
401			/* Current k is on a diagonal that exists in
402			 * kd_backward[]. If the two x positions have met or
403			 * passed (forward walked onto or past backward), then
404			 * we've found a midpoint / a mid-box.
405			 *
406			 * When forwards and backwards traversals meet, the
407			 * endpoints of the mid-snake are not the two points in
408			 * kd_forward and kd_backward, but rather the section
409			 * that was slid (if any) of the current
410			 * forward/backward traversal only.
411			 *
412			 * For example:
413			 *
414			 *   o
415			 *    \
416			 *     o
417			 *      \
418			 *       o
419			 *        \
420			 *         o
421			 *          \
422			 *       X o o
423			 *       | | |
424			 *     o-o-o o
425			 *          \|
426			 *           M
427			 *            \
428			 *             o
429			 *              \
430			 *               A o
431			 *               | |
432			 *             o-o-o
433			 *
434			 * The forward traversal reached M from the top and slid
435			 * downwards to A.  The backward traversal already
436			 * reached X, which is not a straight line from M
437			 * anymore, so picking a mid-snake from M to X would
438			 * yield a mistake.
439			 *
440			 * The correct mid-snake is between M and A. M is where
441			 * the forward traversal hit the diagonal that the
442			 * backward traversal has already passed, and A is what
443			 * it reaches when sliding down identical lines.
444			 */
445			int backward_x = kd_backward[c];
446			if (x >= backward_x) {
447				if (x_before_slide != x) {
448					/* met after sliding up a mid-snake */
449					*meeting_snake = (struct diff_box){
450						.left_start = x_before_slide,
451						.left_end = x,
452						.right_start = xc_to_y(x_before_slide,
453								       c, delta),
454						.right_end = xk_to_y(x, k),
455					};
456				} else {
457					/* met after a side step, non-identical
458					 * line. Mark that as box divider
459					 * instead. This makes sure that
460					 * myers_divide never returns the same
461					 * box that came as input, avoiding
462					 * "infinite" looping. */
463					*meeting_snake = (struct diff_box){
464						.left_start = prev_x,
465						.left_end = x,
466						.right_start = prev_y,
467						.right_end = xk_to_y(x, k),
468					};
469				}
470				debug("HIT x=(%u,%u) - y=(%u,%u)\n",
471				      meeting_snake->left_start,
472				      meeting_snake->right_start,
473				      meeting_snake->left_end,
474				      meeting_snake->right_end);
475				debug_dump_myers_graph(left, right, NULL,
476						       kd_forward, d,
477						       kd_backward, d-1);
478				*found_midpoint = true;
479				return 0;
480			}
481		}
482	}
483
484	return 0;
485}
486
487/* Do one backwards step in the "divide and conquer" graph traversal.
488 * left: the left side to diff.
489 * right: the right side to diff against.
490 * kd_forward: the traversal state for forwards traversal, to find a meeting
491 *             point.
492 *             Since forwards is done first, after this, both kd_forward and
493 *             kd_backward will be valid for d.
494 *             kd_forward points at the center of the state array, allowing
495 *             negative indexes.
496 * kd_backward: the traversal state for backwards traversal, to find a meeting
497 *              point.
498 *              This is carried over between invocations with increasing d.
499 *              kd_backward points at the center of the state array, allowing
500 *              negative indexes.
501 * d: Step or distance counter, indicating for what value of d the kd_backward
502 *    should be populated.
503 *    Before the first invocation, kd_backward[0] shall point at the bottom
504 *    right of the Myers graph (left.len, right.len).
505 *    The first invocation will be for d == 1.
506 * meeting_snake: resulting meeting point, if any.
507 * Return true when a meeting point has been identified.
508 */
509static int
510diff_divide_myers_backward(bool *found_midpoint,
511			   struct diff_data *left, struct diff_data *right,
512			   int *kd_forward, int *kd_backward, int d,
513			   struct diff_box *meeting_snake)
514{
515	int delta = (int)right->atoms.len - (int)left->atoms.len;
516	int c;
517	int x;
518	int prev_x;
519	int prev_y;
520	int x_before_slide;
521
522	*found_midpoint = false;
523
524	for (c = d; c >= -d; c -= 2) {
525		if (c < -(int)left->atoms.len || c > (int)right->atoms.len) {
526			/* This diagonal is completely outside of the Myers
527			 * graph, don't calculate it. */
528			if (c < 0) {
529				/* We are traversing negatively, and already
530				 * below the entire graph, nothing will come of
531				 * this. */
532				break;
533			}
534			continue;
535		}
536		if (d == 0) {
537			/* This is the initializing step. There is no prev_c
538			 * yet, get the initial x from the bottom right of the
539			 * Myers graph. */
540			x = left->atoms.len;
541			prev_x = x;
542			prev_y = xc_to_y(x, c, delta);
543		}
544		/* Favoring "-" lines first means favoring moving rightwards in
545		 * the Myers graph.
546		 * For this, all c should derive from c - 1, only the bottom
547		 * most c derive from c + 1:
548		 *
549		 *                                  2   1   0
550		 *  ---------------------------------------------------
551		 *                                               c=
552		 *                                                3
553		 *
554		 *         from prev_c = c - 1 --> 5,2            2
555		 *                                    \
556		 *                                     5,3        1
557		 *                                        \
558		 *                                 4,3     5,4<-- 0
559		 *                                    \   /
560		 *  bottom most for d=1 from c + 1 --> 4,4       -1
561		 *                                    /
562		 *         bottom most for d=2 --> 3,4           -2
563		 *
564		 * Except when a c + 1 from a previous run already means a
565		 * further advancement in the graph.
566		 * If c == d, there is no c + 1 and c - 1 is the only option.
567		 * If c < d, use c + 1 in case that yields a larger x.
568		 * Also use c + 1 if c - 1 is outside the graph.
569		 */
570		else if (c > -d && (c == d
571				    || (c - 1 >= -(int)right->atoms.len
572					&& kd_backward[c - 1] <= kd_backward[c + 1]))) {
573			/* A top one.
574			 * From position prev_c, step upwards in the Myers
575			 * graph: y -= 1.
576			 * Decrementing y is achieved by incrementing c while
577			 * keeping the same x. (since we're deriving y from
578			 * y = x - c + delta).
579			 */
580			int prev_c = c - 1;
581			prev_x = kd_backward[prev_c];
582			prev_y = xc_to_y(prev_x, prev_c, delta);
583			x = prev_x;
584		} else {
585			/* The bottom most one.
586			 * From position prev_c, step to the left in the Myers
587			 * graph: x -= 1.
588			 */
589			int prev_c = c + 1;
590			prev_x = kd_backward[prev_c];
591			prev_y = xc_to_y(prev_x, prev_c, delta);
592			x = prev_x - 1;
593		}
594
595		/* Slide up any snake that we might find here (sections of
596		 * identical lines on both sides). */
597#if 0
598		debug("c=%d x-1=%d Yb-1=%d-1=%d\n", c, x-1, xc_to_y(x, c,
599								    delta),
600		      xc_to_y(x, c, delta)-1);
601		if (x > 0) {
602			debug("  l=");
603			debug_dump_atom(left, right, &left->atoms.head[x-1]);
604		}
605		if (xc_to_y(x, c, delta) > 0) {
606			debug("  r=");
607			debug_dump_atom(right, left,
608				&right->atoms.head[xc_to_y(x, c, delta)-1]);
609		}
610#endif
611		x_before_slide = x;
612		while (x > 0 && xc_to_y(x, c, delta) > 0) {
613			bool same;
614			int r = diff_atom_same(&same,
615					       &left->atoms.head[x-1],
616					       &right->atoms.head[
617						xc_to_y(x, c, delta)-1]);
618			if (r)
619				return r;
620			if (!same)
621				break;
622			x--;
623		}
624		kd_backward[c] = x;
625#if 0
626		if (x_before_slide != x) {
627			debug("  up %d similar lines\n", x_before_slide - x);
628		}
629
630		if (DEBUG) {
631			int fi;
632			for (fi = d; fi >= c; fi--) {
633				debug("kd_backward[%d] = (%d, %d)\n",
634				      fi,
635				      kd_backward[fi],
636				      kd_backward[fi] - fi + delta);
637			}
638		}
639#endif
640
641		if (x < 0 || x > left->atoms.len
642		    || xc_to_y(x, c, delta) < 0
643		    || xc_to_y(x, c, delta) > right->atoms.len)
644			continue;
645
646		/* Figured out a new backwards traversal, see if this has gone
647		 * onto or even past a preceding forwards traversal.
648		 *
649		 * If the delta in length is even, then d and backwards_d hit
650		 * the same state indexes -- note how this is different from in
651		 * the forwards traversal, because now both d are the same:
652		 *
653		 *      | d=   0   1   2      2   1   0
654		 *  ----+----------------    --------------------
655		 *  k=  |                                  c=
656		 *   4  |
657		 *      |
658		 *   3  |                                   3
659		 *      |                same
660		 *   2  |             2,0====5,2            2
661		 *      |            /          \
662		 *   1  |         1,0            5,3        1
663		 *      |        /              /  \
664		 *   0  |  -->0,0     3,3====4,3    5,4<--  0
665		 *      |        \   /             /
666		 *  -1  |         0,1            4,4       -1
667		 *      |            \
668		 *  -2  |             0,2                  -2
669		 *      |
670		 *                                      -3
671		 * If the delta is odd, they end up off-by-one, i.e. on
672		 * different diagonals.
673		 * So in the backward path, we can only match up diagonals when
674		 * the delta is even.
675		 */
676		if ((delta & 1) != 0)
677			continue;
678		/* Forwards was done first, now both d are the same. */
679		int forwards_d = d;
680
681		/* As soon as the lengths are not the same, the
682		 * backwards traversal starts on a different diagonal,
683		 * and c = k shifted by the difference in length.
684		 */
685		int k = c_to_k(c, delta);
686
687		/* When the file sizes are very different, the traversal trees
688		 * start on far distant diagonals.
689		 * They don't necessarily meet straight on. See whether this
690		 * backward value is also on a valid diagonal in kd_forward[],
691		 * and match them if so. */
692		if (k >= -forwards_d && k <= forwards_d) {
693			/* Current c is on a diagonal that exists in
694			 * kd_forward[]. If the two x positions have met or
695			 * passed (backward walked onto or past forward), then
696			 * we've found a midpoint / a mid-box.
697			 *
698			 * When forwards and backwards traversals meet, the
699			 * endpoints of the mid-snake are not the two points in
700			 * kd_forward and kd_backward, but rather the section
701			 * that was slid (if any) of the current
702			 * forward/backward traversal only.
703			 *
704			 * For example:
705			 *
706			 *   o-o-o
707			 *   | |
708			 *   o A
709			 *   |  \
710			 *   o   o
711			 *        \
712			 *         M
713			 *         |\
714			 *         o o-o-o
715			 *         | | |
716			 *         o o X
717			 *          \
718			 *           o
719			 *            \
720			 *             o
721			 *              \
722			 *               o
723			 *
724			 * The backward traversal reached M from the bottom and
725			 * slid upwards.  The forward traversal already reached
726			 * X, which is not a straight line from M anymore, so
727			 * picking a mid-snake from M to X would yield a
728			 * mistake.
729			 *
730			 * The correct mid-snake is between M and A. M is where
731			 * the backward traversal hit the diagonal that the
732			 * forwards traversal has already passed, and A is what
733			 * it reaches when sliding up identical lines.
734			 */
735
736			int forward_x = kd_forward[k];
737			if (forward_x >= x) {
738				if (x_before_slide != x) {
739					/* met after sliding down a mid-snake */
740					*meeting_snake = (struct diff_box){
741						.left_start = x,
742						.left_end = x_before_slide,
743						.right_start = xc_to_y(x, c, delta),
744						.right_end = xk_to_y(x_before_slide, k),
745					};
746				} else {
747					/* met after a side step, non-identical
748					 * line. Mark that as box divider
749					 * instead. This makes sure that
750					 * myers_divide never returns the same
751					 * box that came as input, avoiding
752					 * "infinite" looping. */
753					*meeting_snake = (struct diff_box){
754						.left_start = x,
755						.left_end = prev_x,
756						.right_start = xc_to_y(x, c, delta),
757						.right_end = prev_y,
758					};
759				}
760				debug("HIT x=%u,%u - y=%u,%u\n",
761				      meeting_snake->left_start,
762				      meeting_snake->right_start,
763				      meeting_snake->left_end,
764				      meeting_snake->right_end);
765				debug_dump_myers_graph(left, right, NULL,
766						       kd_forward, d,
767						       kd_backward, d);
768				*found_midpoint = true;
769				return 0;
770			}
771		}
772	}
773	return 0;
774}
775
776/* Integer square root approximation */
777static int
778shift_sqrt(int val)
779{
780	int i;
781        for (i = 1; val > 0; val >>= 2)
782		i <<= 1;
783        return i;
784}
785
786#define DIFF_EFFORT_MIN 1024
787
788/* Myers "Divide et Impera": tracing forwards from the start and backwards from
789 * the end to find a midpoint that divides the problem into smaller chunks.
790 * Requires only linear amounts of memory. */
791int
792diff_algo_myers_divide(const struct diff_algo_config *algo_config,
793		       struct diff_state *state)
794{
795	int rc = ENOMEM;
796	struct diff_data *left = &state->left;
797	struct diff_data *right = &state->right;
798	int *kd_buf;
799
800	debug("\n** %s\n", __func__);
801	debug("left:\n");
802	debug_dump(left);
803	debug("right:\n");
804	debug_dump(right);
805
806	/* Allocate two columns of a Myers graph, one for the forward and one
807	 * for the backward traversal. */
808	unsigned int max = left->atoms.len + right->atoms.len;
809	size_t kd_len = max + 1;
810	size_t kd_buf_size = kd_len << 1;
811
812	if (state->kd_buf_size < kd_buf_size) {
813		kd_buf = reallocarray(state->kd_buf, kd_buf_size,
814		    sizeof(int));
815		if (!kd_buf)
816			return ENOMEM;
817		state->kd_buf = kd_buf;
818		state->kd_buf_size = kd_buf_size;
819	} else
820		kd_buf = state->kd_buf;
821	int i;
822	for (i = 0; i < kd_buf_size; i++)
823		kd_buf[i] = -1;
824	int *kd_forward = kd_buf;
825	int *kd_backward = kd_buf + kd_len;
826	int max_effort = shift_sqrt(max/2);
827
828	if (max_effort < DIFF_EFFORT_MIN)
829		max_effort = DIFF_EFFORT_MIN;
830
831	/* The 'k' axis in Myers spans positive and negative indexes, so point
832	 * the kd to the middle.
833	 * It is then possible to index from -max/2 .. max/2. */
834	kd_forward += max/2;
835	kd_backward += max/2;
836
837	int d;
838	struct diff_box mid_snake = {};
839	bool found_midpoint = false;
840	for (d = 0; d <= (max/2); d++) {
841		int r;
842		r = diff_divide_myers_forward(&found_midpoint, left, right,
843					      kd_forward, kd_backward, d,
844					      &mid_snake);
845		if (r)
846			return r;
847		if (found_midpoint)
848			break;
849		r = diff_divide_myers_backward(&found_midpoint, left, right,
850					       kd_forward, kd_backward, d,
851					       &mid_snake);
852		if (r)
853			return r;
854		if (found_midpoint)
855			break;
856
857		/* Limit the effort spent looking for a mid snake. If files have
858		 * very few lines in common, the effort spent to find nice mid
859		 * snakes is just not worth it, the diff result will still be
860		 * essentially minus everything on the left, plus everything on
861		 * the right, with a few useless matches here and there. */
862		if (d > max_effort) {
863			/* pick the furthest reaching point from
864			 * kd_forward and kd_backward, and use that as a
865			 * midpoint, to not step into another diff algo
866			 * recursion with unchanged box. */
867			int delta = (int)right->atoms.len - (int)left->atoms.len;
868			int x = 0;
869			int y;
870			int i;
871			int best_forward_i = 0;
872			int best_forward_distance = 0;
873			int best_backward_i = 0;
874			int best_backward_distance = 0;
875			int distance;
876			int best_forward_x;
877			int best_forward_y;
878			int best_backward_x;
879			int best_backward_y;
880
881			debug("~~~ HIT d = %d > max_effort = %d\n", d, max_effort);
882			debug_dump_myers_graph(left, right, NULL,
883					       kd_forward, d,
884					       kd_backward, d);
885
886			for (i = d; i >= -d; i -= 2) {
887				if (i >= -(int)right->atoms.len && i <= (int)left->atoms.len) {
888					x = kd_forward[i];
889					y = xk_to_y(x, i);
890					distance = x + y;
891					if (distance > best_forward_distance) {
892						best_forward_distance = distance;
893						best_forward_i = i;
894					}
895				}
896
897				if (i >= -(int)left->atoms.len && i <= (int)right->atoms.len) {
898					x = kd_backward[i];
899					y = xc_to_y(x, i, delta);
900					distance = (right->atoms.len - x)
901						+ (left->atoms.len - y);
902					if (distance >= best_backward_distance) {
903						best_backward_distance = distance;
904						best_backward_i = i;
905					}
906				}
907			}
908
909			/* The myers-divide didn't meet in the middle. We just
910			 * figured out the places where the forward path
911			 * advanced the most, and the backward path advanced the
912			 * most. Just divide at whichever one of those two is better.
913			 *
914			 *   o-o
915			 *     |
916			 *     o
917			 *      \
918			 *       o
919			 *        \
920			 *         F <-- cut here
921			 *
922			 *
923			 *
924			 *           or here --> B
925			 *                        \
926			 *                         o
927			 *                          \
928			 *                           o
929			 *                           |
930			 *                           o-o
931			 */
932			best_forward_x = kd_forward[best_forward_i];
933			best_forward_y = xk_to_y(best_forward_x, best_forward_i);
934			best_backward_x = kd_backward[best_backward_i];
935			best_backward_y = xc_to_y(best_backward_x, best_backward_i, delta);
936
937			if (best_forward_distance >= best_backward_distance) {
938				x = best_forward_x;
939				y = best_forward_y;
940			} else {
941				x = best_backward_x;
942				y = best_backward_y;
943			}
944
945			debug("max_effort cut at x=%d y=%d\n", x, y);
946			if (x < 0 || y < 0
947			    || x > left->atoms.len || y > right->atoms.len)
948				break;
949
950			found_midpoint = true;
951			mid_snake = (struct diff_box){
952				.left_start = x,
953				.left_end = x,
954				.right_start = y,
955				.right_end = y,
956			};
957			break;
958		}
959	}
960
961	if (!found_midpoint) {
962		/* Divide and conquer failed to find a meeting point. Use the
963		 * fallback_algo defined in the algo_config (leave this to the
964		 * caller). This is just paranoia/sanity, we normally should
965		 * always find a midpoint.
966		 */
967		debug(" no midpoint \n");
968		rc = DIFF_RC_USE_DIFF_ALGO_FALLBACK;
969		goto return_rc;
970	} else {
971		debug(" mid snake L: %u to %u of %u   R: %u to %u of %u\n",
972		      mid_snake.left_start, mid_snake.left_end, left->atoms.len,
973		      mid_snake.right_start, mid_snake.right_end,
974		      right->atoms.len);
975
976		/* Section before the mid-snake.  */
977		debug("Section before the mid-snake\n");
978
979		struct diff_atom *left_atom = &left->atoms.head[0];
980		unsigned int left_section_len = mid_snake.left_start;
981		struct diff_atom *right_atom = &right->atoms.head[0];
982		unsigned int right_section_len = mid_snake.right_start;
983
984		if (left_section_len && right_section_len) {
985			/* Record an unsolved chunk, the caller will apply
986			 * inner_algo() on this chunk. */
987			if (!diff_state_add_chunk(state, false,
988						  left_atom, left_section_len,
989						  right_atom,
990						  right_section_len))
991				goto return_rc;
992		} else if (left_section_len && !right_section_len) {
993			/* Only left atoms and none on the right, they form a
994			 * "minus" chunk, then. */
995			if (!diff_state_add_chunk(state, true,
996						  left_atom, left_section_len,
997						  right_atom, 0))
998				goto return_rc;
999		} else if (!left_section_len && right_section_len) {
1000			/* No left atoms, only atoms on the right, they form a
1001			 * "plus" chunk, then. */
1002			if (!diff_state_add_chunk(state, true,
1003						  left_atom, 0,
1004						  right_atom,
1005						  right_section_len))
1006				goto return_rc;
1007		}
1008		/* else: left_section_len == 0 and right_section_len == 0, i.e.
1009		 * nothing before the mid-snake. */
1010
1011		if (mid_snake.left_end > mid_snake.left_start
1012		    || mid_snake.right_end > mid_snake.right_start) {
1013			/* The midpoint is a section of identical data on both
1014			 * sides, or a certain differing line: that section
1015			 * immediately becomes a solved chunk. */
1016			debug("the mid-snake\n");
1017			if (!diff_state_add_chunk(state, true,
1018				  &left->atoms.head[mid_snake.left_start],
1019				  mid_snake.left_end - mid_snake.left_start,
1020				  &right->atoms.head[mid_snake.right_start],
1021				  mid_snake.right_end - mid_snake.right_start))
1022				goto return_rc;
1023		}
1024
1025		/* Section after the mid-snake. */
1026		debug("Section after the mid-snake\n");
1027		debug("  left_end %u  right_end %u\n",
1028		      mid_snake.left_end, mid_snake.right_end);
1029		debug("  left_count %u  right_count %u\n",
1030		      left->atoms.len, right->atoms.len);
1031		left_atom = &left->atoms.head[mid_snake.left_end];
1032		left_section_len = left->atoms.len - mid_snake.left_end;
1033		right_atom = &right->atoms.head[mid_snake.right_end];
1034		right_section_len = right->atoms.len - mid_snake.right_end;
1035
1036		if (left_section_len && right_section_len) {
1037			/* Record an unsolved chunk, the caller will apply
1038			 * inner_algo() on this chunk. */
1039			if (!diff_state_add_chunk(state, false,
1040						  left_atom, left_section_len,
1041						  right_atom,
1042						  right_section_len))
1043				goto return_rc;
1044		} else if (left_section_len && !right_section_len) {
1045			/* Only left atoms and none on the right, they form a
1046			 * "minus" chunk, then. */
1047			if (!diff_state_add_chunk(state, true,
1048						  left_atom, left_section_len,
1049						  right_atom, 0))
1050				goto return_rc;
1051		} else if (!left_section_len && right_section_len) {
1052			/* No left atoms, only atoms on the right, they form a
1053			 * "plus" chunk, then. */
1054			if (!diff_state_add_chunk(state, true,
1055						  left_atom, 0,
1056						  right_atom,
1057						  right_section_len))
1058				goto return_rc;
1059		}
1060		/* else: left_section_len == 0 and right_section_len == 0, i.e.
1061		 * nothing after the mid-snake. */
1062	}
1063
1064	rc = DIFF_RC_OK;
1065
1066return_rc:
1067	debug("** END %s\n", __func__);
1068	return rc;
1069}
1070
1071/* Myers Diff tracing from the start all the way through to the end, requiring
1072 * quadratic amounts of memory. This can fail if the required space surpasses
1073 * algo_config->permitted_state_size. */
1074int
1075diff_algo_myers(const struct diff_algo_config *algo_config,
1076		struct diff_state *state)
1077{
1078	/* do a diff_divide_myers_forward() without a _backward(), so that it
1079	 * walks forward across the entire files to reach the end. Keep each
1080	 * run's state, and do a final backtrace. */
1081	int rc = ENOMEM;
1082	struct diff_data *left = &state->left;
1083	struct diff_data *right = &state->right;
1084	int *kd_buf;
1085
1086	debug("\n** %s\n", __func__);
1087	debug("left:\n");
1088	debug_dump(left);
1089	debug("right:\n");
1090	debug_dump(right);
1091	debug_dump_myers_graph(left, right, NULL, NULL, 0, NULL, 0);
1092
1093	/* Allocate two columns of a Myers graph, one for the forward and one
1094	 * for the backward traversal. */
1095	unsigned int max = left->atoms.len + right->atoms.len;
1096	size_t kd_len = max + 1 + max;
1097	size_t kd_buf_size = kd_len * kd_len;
1098	size_t kd_state_size = kd_buf_size * sizeof(int);
1099	debug("state size: %zu\n", kd_state_size);
1100	if (kd_buf_size < kd_len /* overflow? */
1101	    || (SIZE_MAX / kd_len ) < kd_len
1102	    || kd_state_size > algo_config->permitted_state_size) {
1103		debug("state size %zu > permitted_state_size %zu, use fallback_algo\n",
1104		      kd_state_size, algo_config->permitted_state_size);
1105		return DIFF_RC_USE_DIFF_ALGO_FALLBACK;
1106	}
1107
1108	if (state->kd_buf_size < kd_buf_size) {
1109		kd_buf = reallocarray(state->kd_buf, kd_buf_size,
1110		    sizeof(int));
1111		if (!kd_buf)
1112			return ENOMEM;
1113		state->kd_buf = kd_buf;
1114		state->kd_buf_size = kd_buf_size;
1115	} else
1116		kd_buf = state->kd_buf;
1117
1118	int i;
1119	for (i = 0; i < kd_buf_size; i++)
1120		kd_buf[i] = -1;
1121
1122	/* The 'k' axis in Myers spans positive and negative indexes, so point
1123	 * the kd to the middle.
1124	 * It is then possible to index from -max .. max. */
1125	int *kd_origin = kd_buf + max;
1126	int *kd_column = kd_origin;
1127
1128	int d;
1129	int backtrack_d = -1;
1130	int backtrack_k = 0;
1131	int k;
1132	int x, y;
1133	for (d = 0; d <= max; d++, kd_column += kd_len) {
1134		debug("-- %s d=%d\n", __func__, d);
1135
1136		for (k = d; k >= -d; k -= 2) {
1137			if (k < -(int)right->atoms.len
1138			    || k > (int)left->atoms.len) {
1139				/* This diagonal is completely outside of the
1140				 * Myers graph, don't calculate it. */
1141				if (k < -(int)right->atoms.len)
1142					debug(" %d k <"
1143					      " -(int)right->atoms.len %d\n",
1144					      k, -(int)right->atoms.len);
1145				else
1146					debug(" %d k > left->atoms.len %d\n", k,
1147					      left->atoms.len);
1148				if (k < 0) {
1149					/* We are traversing negatively, and
1150					 * already below the entire graph,
1151					 * nothing will come of this. */
1152					debug(" break\n");
1153					break;
1154				}
1155				debug(" continue\n");
1156				continue;
1157			}
1158
1159			if (d == 0) {
1160				/* This is the initializing step. There is no
1161				 * prev_k yet, get the initial x from the top
1162				 * left of the Myers graph. */
1163				x = 0;
1164			} else {
1165				int *kd_prev_column = kd_column - kd_len;
1166
1167				/* Favoring "-" lines first means favoring
1168				 * moving rightwards in the Myers graph.
1169				 * For this, all k should derive from k - 1,
1170				 * only the bottom most k derive from k + 1:
1171				 *
1172				 *      | d=   0   1   2
1173				 *  ----+----------------
1174				 *  k=  |
1175				 *   2  |             2,0 <-- from
1176				 *      |            /        prev_k = 2 - 1 = 1
1177				 *   1  |         1,0
1178				 *      |        /
1179				 *   0  |  -->0,0     3,3
1180				 *      |       \\   /
1181				 *  -1  |         0,1 <-- bottom most for d=1
1182				 *      |           \\    from prev_k = -1+1 = 0
1183				 *  -2  |             0,2 <-- bottom most for
1184				 *                            d=2 from
1185				 *                            prev_k = -2+1 = -1
1186				 *
1187				 * Except when a k + 1 from a previous run
1188				 * already means a further advancement in the
1189				 * graph.
1190				 * If k == d, there is no k + 1 and k - 1 is the
1191				 * only option.
1192				 * If k < d, use k + 1 in case that yields a
1193				 * larger x. Also use k + 1 if k - 1 is outside
1194				 * the graph.
1195				 */
1196				if (k > -d
1197				    && (k == d
1198					|| (k - 1 >= -(int)right->atoms.len
1199					    && kd_prev_column[k - 1]
1200					       >= kd_prev_column[k + 1]))) {
1201					/* Advance from k - 1.
1202					 * From position prev_k, step to the
1203					 * right in the Myers graph: x += 1.
1204					 */
1205					int prev_k = k - 1;
1206					int prev_x = kd_prev_column[prev_k];
1207					x = prev_x + 1;
1208				} else {
1209					/* The bottom most one.
1210					 * From position prev_k, step to the
1211					 * bottom in the Myers graph: y += 1.
1212					 * Incrementing y is achieved by
1213					 * decrementing k while keeping the same
1214					 * x. (since we're deriving y from y =
1215					 * x - k).
1216					 */
1217					int prev_k = k + 1;
1218					int prev_x = kd_prev_column[prev_k];
1219					x = prev_x;
1220				}
1221			}
1222
1223			/* Slide down any snake that we might find here. */
1224			while (x < left->atoms.len
1225			       && xk_to_y(x, k) < right->atoms.len) {
1226				bool same;
1227				int r = diff_atom_same(&same,
1228						       &left->atoms.head[x],
1229						       &right->atoms.head[
1230							xk_to_y(x, k)]);
1231				if (r)
1232					return r;
1233				if (!same)
1234					break;
1235				x++;
1236			}
1237			kd_column[k] = x;
1238
1239			if (x == left->atoms.len
1240			    && xk_to_y(x, k) == right->atoms.len) {
1241				/* Found a path */
1242				backtrack_d = d;
1243				backtrack_k = k;
1244				debug("Reached the end at d = %d, k = %d\n",
1245				      backtrack_d, backtrack_k);
1246				break;
1247			}
1248		}
1249
1250		if (backtrack_d >= 0)
1251			break;
1252	}
1253
1254	debug_dump_myers_graph(left, right, kd_origin, NULL, 0, NULL, 0);
1255
1256	/* backtrack. A matrix spanning from start to end of the file is ready:
1257	 *
1258	 *      | d=   0   1   2   3   4
1259	 *  ----+---------------------------------
1260	 *  k=  |
1261	 *   3  |
1262	 *      |
1263	 *   2  |             2,0
1264	 *      |            /
1265	 *   1  |         1,0     4,3
1266	 *      |        /       /   \
1267	 *   0  |  -->0,0     3,3     4,4 --> backtrack_d = 4, backtrack_k = 0
1268	 *      |        \   /   \
1269	 *  -1  |         0,1     3,4
1270	 *      |            \
1271	 *  -2  |             0,2
1272	 *      |
1273	 *
1274	 * From (4,4) backwards, find the previous position that is the largest, and remember it.
1275	 *
1276	 */
1277	for (d = backtrack_d, k = backtrack_k; d >= 0; d--) {
1278		x = kd_column[k];
1279		y = xk_to_y(x, k);
1280
1281		/* When the best position is identified, remember it for that
1282		 * kd_column.
1283		 * That kd_column is no longer needed otherwise, so just
1284		 * re-purpose kd_column[0] = x and kd_column[1] = y,
1285		 * so that there is no need to allocate more memory.
1286		 */
1287		kd_column[0] = x;
1288		kd_column[1] = y;
1289		debug("Backtrack d=%d: xy=(%d, %d)\n",
1290		      d, kd_column[0], kd_column[1]);
1291
1292		/* Don't access memory before kd_buf */
1293		if (d == 0)
1294			break;
1295		int *kd_prev_column = kd_column - kd_len;
1296
1297		/* When y == 0, backtracking downwards (k-1) is the only way.
1298		 * When x == 0, backtracking upwards (k+1) is the only way.
1299		 *
1300		 *      | d=   0   1   2   3   4
1301		 *  ----+---------------------------------
1302		 *  k=  |
1303		 *   3  |
1304		 *      |                ..y == 0
1305		 *   2  |             2,0
1306		 *      |            /
1307		 *   1  |         1,0     4,3
1308		 *      |        /       /   \
1309		 *   0  |  -->0,0     3,3     4,4 --> backtrack_d = 4,
1310		 *      |        \   /   \            backtrack_k = 0
1311		 *  -1  |         0,1     3,4
1312		 *      |            \
1313		 *  -2  |             0,2__
1314		 *      |                  x == 0
1315		 */
1316		if (y == 0
1317		    || (x > 0
1318			&& kd_prev_column[k - 1] >= kd_prev_column[k + 1])) {
1319			k = k - 1;
1320			debug("prev k=k-1=%d x=%d y=%d\n",
1321			      k, kd_prev_column[k],
1322			      xk_to_y(kd_prev_column[k], k));
1323		} else {
1324			k = k + 1;
1325			debug("prev k=k+1=%d x=%d y=%d\n",
1326			      k, kd_prev_column[k],
1327			      xk_to_y(kd_prev_column[k], k));
1328		}
1329		kd_column = kd_prev_column;
1330	}
1331
1332	/* Forwards again, this time recording the diff chunks.
1333	 * Definitely start from 0,0. kd_column[0] may actually point to the
1334	 * bottom of a snake starting at 0,0 */
1335	x = 0;
1336	y = 0;
1337
1338	kd_column = kd_origin;
1339	for (d = 0; d <= backtrack_d; d++, kd_column += kd_len) {
1340		int next_x = kd_column[0];
1341		int next_y = kd_column[1];
1342		debug("Forward track from xy(%d,%d) to xy(%d,%d)\n",
1343		      x, y, next_x, next_y);
1344
1345		struct diff_atom *left_atom = &left->atoms.head[x];
1346		int left_section_len = next_x - x;
1347		struct diff_atom *right_atom = &right->atoms.head[y];
1348		int right_section_len = next_y - y;
1349
1350		rc = ENOMEM;
1351		if (left_section_len && right_section_len) {
1352			/* This must be a snake slide.
1353			 * Snake slides have a straight line leading into them
1354			 * (except when starting at (0,0)). Find out whether the
1355			 * lead-in is horizontal or vertical:
1356			 *
1357			 *     left
1358			 *  ---------->
1359			 *  |
1360			 * r|   o-o        o
1361			 * i|      \       |
1362			 * g|       o      o
1363			 * h|        \      \
1364			 * t|         o      o
1365			 *  v
1366			 *
1367			 * If left_section_len > right_section_len, the lead-in
1368			 * is horizontal, meaning first remove one atom from the
1369			 * left before sliding down the snake.
1370			 * If right_section_len > left_section_len, the lead-in
1371			 * is vetical, so add one atom from the right before
1372			 * sliding down the snake. */
1373			if (left_section_len == right_section_len + 1) {
1374				if (!diff_state_add_chunk(state, true,
1375							  left_atom, 1,
1376							  right_atom, 0))
1377					goto return_rc;
1378				left_atom++;
1379				left_section_len--;
1380			} else if (right_section_len == left_section_len + 1) {
1381				if (!diff_state_add_chunk(state, true,
1382							  left_atom, 0,
1383							  right_atom, 1))
1384					goto return_rc;
1385				right_atom++;
1386				right_section_len--;
1387			} else if (left_section_len != right_section_len) {
1388				/* The numbers are making no sense. Should never
1389				 * happen. */
1390				rc = DIFF_RC_USE_DIFF_ALGO_FALLBACK;
1391				goto return_rc;
1392			}
1393
1394			if (!diff_state_add_chunk(state, true,
1395						  left_atom, left_section_len,
1396						  right_atom,
1397						  right_section_len))
1398				goto return_rc;
1399		} else if (left_section_len && !right_section_len) {
1400			/* Only left atoms and none on the right, they form a
1401			 * "minus" chunk, then. */
1402			if (!diff_state_add_chunk(state, true,
1403						  left_atom, left_section_len,
1404						  right_atom, 0))
1405				goto return_rc;
1406		} else if (!left_section_len && right_section_len) {
1407			/* No left atoms, only atoms on the right, they form a
1408			 * "plus" chunk, then. */
1409			if (!diff_state_add_chunk(state, true,
1410						  left_atom, 0,
1411						  right_atom,
1412						  right_section_len))
1413				goto return_rc;
1414		}
1415
1416		x = next_x;
1417		y = next_y;
1418	}
1419
1420	rc = DIFF_RC_OK;
1421
1422return_rc:
1423	debug("** END %s rc=%d\n", __func__, rc);
1424	return rc;
1425}
1426