1/*
2 * divsufsort.c for libdivsufsort-lite
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27/*- Compiler specifics -*/
28#ifdef __clang__
29#pragma clang diagnostic ignored "-Wshorten-64-to-32"
30#endif
31
32#if defined(_MSC_VER)
33#  pragma warning(disable : 4244)
34#  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */
35#endif
36
37
38/*- Dependencies -*/
39#include <assert.h>
40#include <stdio.h>
41#include <stdlib.h>
42
43#include "divsufsort.h"
44
45/*- Constants -*/
46#if defined(INLINE)
47# undef INLINE
48#endif
49#if !defined(INLINE)
50# define INLINE __inline
51#endif
52#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
53# undef ALPHABET_SIZE
54#endif
55#if !defined(ALPHABET_SIZE)
56# define ALPHABET_SIZE (256)
57#endif
58#define BUCKET_A_SIZE (ALPHABET_SIZE)
59#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
60#if defined(SS_INSERTIONSORT_THRESHOLD)
61# if SS_INSERTIONSORT_THRESHOLD < 1
62#  undef SS_INSERTIONSORT_THRESHOLD
63#  define SS_INSERTIONSORT_THRESHOLD (1)
64# endif
65#else
66# define SS_INSERTIONSORT_THRESHOLD (8)
67#endif
68#if defined(SS_BLOCKSIZE)
69# if SS_BLOCKSIZE < 0
70#  undef SS_BLOCKSIZE
71#  define SS_BLOCKSIZE (0)
72# elif 32768 <= SS_BLOCKSIZE
73#  undef SS_BLOCKSIZE
74#  define SS_BLOCKSIZE (32767)
75# endif
76#else
77# define SS_BLOCKSIZE (1024)
78#endif
79/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
80#if SS_BLOCKSIZE == 0
81# define SS_MISORT_STACKSIZE (96)
82#elif SS_BLOCKSIZE <= 4096
83# define SS_MISORT_STACKSIZE (16)
84#else
85# define SS_MISORT_STACKSIZE (24)
86#endif
87#define SS_SMERGE_STACKSIZE (32)
88#define TR_INSERTIONSORT_THRESHOLD (8)
89#define TR_STACKSIZE (64)
90
91
92/*- Macros -*/
93#ifndef SWAP
94# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
95#endif /* SWAP */
96#ifndef MIN
97# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
98#endif /* MIN */
99#ifndef MAX
100# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
101#endif /* MAX */
102#define STACK_PUSH(_a, _b, _c, _d)\
103  do {\
104    assert(ssize < STACK_SIZE);\
105    stack[ssize].a = (_a), stack[ssize].b = (_b),\
106    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
107  } while(0)
108#define STACK_PUSH5(_a, _b, _c, _d, _e)\
109  do {\
110    assert(ssize < STACK_SIZE);\
111    stack[ssize].a = (_a), stack[ssize].b = (_b),\
112    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
113  } while(0)
114#define STACK_POP(_a, _b, _c, _d)\
115  do {\
116    assert(0 <= ssize);\
117    if(ssize == 0) { return; }\
118    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
119    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
120  } while(0)
121#define STACK_POP5(_a, _b, _c, _d, _e)\
122  do {\
123    assert(0 <= ssize);\
124    if(ssize == 0) { return; }\
125    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
126    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
127  } while(0)
128#define BUCKET_A(_c0) bucket_A[(_c0)]
129#if ALPHABET_SIZE == 256
130#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
131#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
132#else
133#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
134#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
135#endif
136
137
138/*- Private Functions -*/
139
140static const int lg_table[256]= {
141 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
142  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
143  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
144  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
145  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
146  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
147  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
148  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
149};
150
151#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
152
153static INLINE
154int
155ss_ilg(int n) {
156#if SS_BLOCKSIZE == 0
157  return (n & 0xffff0000) ?
158          ((n & 0xff000000) ?
159            24 + lg_table[(n >> 24) & 0xff] :
160            16 + lg_table[(n >> 16) & 0xff]) :
161          ((n & 0x0000ff00) ?
162             8 + lg_table[(n >>  8) & 0xff] :
163             0 + lg_table[(n >>  0) & 0xff]);
164#elif SS_BLOCKSIZE < 256
165  return lg_table[n];
166#else
167  return (n & 0xff00) ?
168          8 + lg_table[(n >> 8) & 0xff] :
169          0 + lg_table[(n >> 0) & 0xff];
170#endif
171}
172
173#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
174
175#if SS_BLOCKSIZE != 0
176
177static const int sqq_table[256] = {
178  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
179 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
180 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
181110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
182128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
183143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
184156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
185169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
186181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
187192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
188202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
189212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
190221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
191230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
192239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
193247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
194};
195
196static INLINE
197int
198ss_isqrt(int x) {
199  int y, e;
200
201  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
202  e = (x & 0xffff0000) ?
203        ((x & 0xff000000) ?
204          24 + lg_table[(x >> 24) & 0xff] :
205          16 + lg_table[(x >> 16) & 0xff]) :
206        ((x & 0x0000ff00) ?
207           8 + lg_table[(x >>  8) & 0xff] :
208           0 + lg_table[(x >>  0) & 0xff]);
209
210  if(e >= 16) {
211    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
212    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
213    y = (y + 1 + x / y) >> 1;
214  } else if(e >= 8) {
215    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
216  } else {
217    return sqq_table[x] >> 4;
218  }
219
220  return (x < (y * y)) ? y - 1 : y;
221}
222
223#endif /* SS_BLOCKSIZE != 0 */
224
225
226/*---------------------------------------------------------------------------*/
227
228/* Compares two suffixes. */
229static INLINE
230int
231ss_compare(const unsigned char *T,
232           const int *p1, const int *p2,
233           int depth) {
234  const unsigned char *U1, *U2, *U1n, *U2n;
235
236  for(U1 = T + depth + *p1,
237      U2 = T + depth + *p2,
238      U1n = T + *(p1 + 1) + 2,
239      U2n = T + *(p2 + 1) + 2;
240      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
241      ++U1, ++U2) {
242  }
243
244  return U1 < U1n ?
245        (U2 < U2n ? *U1 - *U2 : 1) :
246        (U2 < U2n ? -1 : 0);
247}
248
249
250/*---------------------------------------------------------------------------*/
251
252#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
253
254/* Insertionsort for small size groups */
255static
256void
257ss_insertionsort(const unsigned char *T, const int *PA,
258                 int *first, int *last, int depth) {
259  int *i, *j;
260  int t;
261  int r;
262
263  for(i = last - 2; first <= i; --i) {
264    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
265      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
266      if(last <= j) { break; }
267    }
268    if(r == 0) { *j = ~*j; }
269    *(j - 1) = t;
270  }
271}
272
273#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
274
275
276/*---------------------------------------------------------------------------*/
277
278#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
279
280static INLINE
281void
282ss_fixdown(const unsigned char *Td, const int *PA,
283           int *SA, int i, int size) {
284  int j, k;
285  int v;
286  int c, d, e;
287
288  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
289    d = Td[PA[SA[k = j++]]];
290    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
291    if(d <= c) { break; }
292  }
293  SA[i] = v;
294}
295
296/* Simple top-down heapsort. */
297static
298void
299ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
300  int i, m;
301  int t;
302
303  m = size;
304  if((size % 2) == 0) {
305    m--;
306    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
307  }
308
309  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
310  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
311  for(i = m - 1; 0 < i; --i) {
312    t = SA[0], SA[0] = SA[i];
313    ss_fixdown(Td, PA, SA, 0, i);
314    SA[i] = t;
315  }
316}
317
318
319/*---------------------------------------------------------------------------*/
320
321/* Returns the median of three elements. */
322static INLINE
323int *
324ss_median3(const unsigned char *Td, const int *PA,
325           int *v1, int *v2, int *v3) {
326  int *t;
327  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
328  if(Td[PA[*v2]] > Td[PA[*v3]]) {
329    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
330    else { return v3; }
331  }
332  return v2;
333}
334
335/* Returns the median of five elements. */
336static INLINE
337int *
338ss_median5(const unsigned char *Td, const int *PA,
339           int *v1, int *v2, int *v3, int *v4, int *v5) {
340  int *t;
341  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
342  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
343  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
344  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
345  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
346  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
347  return v3;
348}
349
350/* Returns the pivot element. */
351static INLINE
352int *
353ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
354  int *middle;
355  int t;
356
357  t = last - first;
358  middle = first + t / 2;
359
360  if(t <= 512) {
361    if(t <= 32) {
362      return ss_median3(Td, PA, first, middle, last - 1);
363    } else {
364      t >>= 2;
365      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
366    }
367  }
368  t >>= 3;
369  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
370  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
371  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
372  return ss_median3(Td, PA, first, middle, last);
373}
374
375
376/*---------------------------------------------------------------------------*/
377
378/* Binary partition for substrings. */
379static INLINE
380int *
381ss_partition(const int *PA,
382                    int *first, int *last, int depth) {
383  int *a, *b;
384  int t;
385  for(a = first - 1, b = last;;) {
386    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
387    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
388    if(b <= a) { break; }
389    t = ~*b;
390    *b = *a;
391    *a = t;
392  }
393  if(first < a) { *first = ~*first; }
394  return a;
395}
396
397/* Multikey introsort for medium size groups. */
398static
399void
400ss_mintrosort(const unsigned char *T, const int *PA,
401              int *first, int *last,
402              int depth) {
403#define STACK_SIZE SS_MISORT_STACKSIZE
404  struct { int *a, *b, c; int d; } stack[STACK_SIZE];
405  const unsigned char *Td;
406  int *a, *b, *c, *d, *e, *f;
407  int s, t;
408  int ssize;
409  int limit;
410  int v, x = 0;
411
412  for(ssize = 0, limit = ss_ilg(last - first);;) {
413
414    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
415#if 1 < SS_INSERTIONSORT_THRESHOLD
416      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
417#endif
418      STACK_POP(first, last, depth, limit);
419      continue;
420    }
421
422    Td = T + depth;
423    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
424    if(limit < 0) {
425      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
426        if((x = Td[PA[*a]]) != v) {
427          if(1 < (a - first)) { break; }
428          v = x;
429          first = a;
430        }
431      }
432      if(Td[PA[*first] - 1] < v) {
433        first = ss_partition(PA, first, a, depth);
434      }
435      if((a - first) <= (last - a)) {
436        if(1 < (a - first)) {
437          STACK_PUSH(a, last, depth, -1);
438          last = a, depth += 1, limit = ss_ilg(a - first);
439        } else {
440          first = a, limit = -1;
441        }
442      } else {
443        if(1 < (last - a)) {
444          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
445          first = a, limit = -1;
446        } else {
447          last = a, depth += 1, limit = ss_ilg(a - first);
448        }
449      }
450      continue;
451    }
452
453    /* choose pivot */
454    a = ss_pivot(Td, PA, first, last);
455    v = Td[PA[*a]];
456    SWAP(*first, *a);
457
458    /* partition */
459    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
460    if(((a = b) < last) && (x < v)) {
461      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
462        if(x == v) { SWAP(*b, *a); ++a; }
463      }
464    }
465    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
466    if((b < (d = c)) && (x > v)) {
467      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
468        if(x == v) { SWAP(*c, *d); --d; }
469      }
470    }
471    for(; b < c;) {
472      SWAP(*b, *c);
473      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
474        if(x == v) { SWAP(*b, *a); ++a; }
475      }
476      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
477        if(x == v) { SWAP(*c, *d); --d; }
478      }
479    }
480
481    if(a <= d) {
482      c = b - 1;
483
484      if((s = a - first) > (t = b - a)) { s = t; }
485      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
486      if((s = d - c) > (t = last - d - 1)) { s = t; }
487      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
488
489      a = first + (b - a), c = last - (d - c);
490      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
491
492      if((a - first) <= (last - c)) {
493        if((last - c) <= (c - b)) {
494          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495          STACK_PUSH(c, last, depth, limit);
496          last = a;
497        } else if((a - first) <= (c - b)) {
498          STACK_PUSH(c, last, depth, limit);
499          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
500          last = a;
501        } else {
502          STACK_PUSH(c, last, depth, limit);
503          STACK_PUSH(first, a, depth, limit);
504          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
505        }
506      } else {
507        if((a - first) <= (c - b)) {
508          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
509          STACK_PUSH(first, a, depth, limit);
510          first = c;
511        } else if((last - c) <= (c - b)) {
512          STACK_PUSH(first, a, depth, limit);
513          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
514          first = c;
515        } else {
516          STACK_PUSH(first, a, depth, limit);
517          STACK_PUSH(c, last, depth, limit);
518          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
519        }
520      }
521    } else {
522      limit += 1;
523      if(Td[PA[*first] - 1] < v) {
524        first = ss_partition(PA, first, last, depth);
525        limit = ss_ilg(last - first);
526      }
527      depth += 1;
528    }
529  }
530#undef STACK_SIZE
531}
532
533#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
534
535
536/*---------------------------------------------------------------------------*/
537
538#if SS_BLOCKSIZE != 0
539
540static INLINE
541void
542ss_blockswap(int *a, int *b, int n) {
543  int t;
544  for(; 0 < n; --n, ++a, ++b) {
545    t = *a, *a = *b, *b = t;
546  }
547}
548
549static INLINE
550void
551ss_rotate(int *first, int *middle, int *last) {
552  int *a, *b, t;
553  int l, r;
554  l = middle - first, r = last - middle;
555  for(; (0 < l) && (0 < r);) {
556    if(l == r) { ss_blockswap(first, middle, l); break; }
557    if(l < r) {
558      a = last - 1, b = middle - 1;
559      t = *a;
560      do {
561        *a-- = *b, *b-- = *a;
562        if(b < first) {
563          *a = t;
564          last = a;
565          if((r -= l + 1) <= l) { break; }
566          a -= 1, b = middle - 1;
567          t = *a;
568        }
569      } while(1);
570    } else {
571      a = first, b = middle;
572      t = *a;
573      do {
574        *a++ = *b, *b++ = *a;
575        if(last <= b) {
576          *a = t;
577          first = a + 1;
578          if((l -= r + 1) <= r) { break; }
579          a += 1, b = middle;
580          t = *a;
581        }
582      } while(1);
583    }
584  }
585}
586
587
588/*---------------------------------------------------------------------------*/
589
590static
591void
592ss_inplacemerge(const unsigned char *T, const int *PA,
593                int *first, int *middle, int *last,
594                int depth) {
595  const int *p;
596  int *a, *b;
597  int len, half;
598  int q, r;
599  int x;
600
601  for(;;) {
602    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
603    else                { x = 0; p = PA +  *(last - 1); }
604    for(a = first, len = middle - first, half = len >> 1, r = -1;
605        0 < len;
606        len = half, half >>= 1) {
607      b = a + half;
608      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
609      if(q < 0) {
610        a = b + 1;
611        half -= (len & 1) ^ 1;
612      } else {
613        r = q;
614      }
615    }
616    if(a < middle) {
617      if(r == 0) { *a = ~*a; }
618      ss_rotate(a, middle, last);
619      last -= middle - a;
620      middle = a;
621      if(first == middle) { break; }
622    }
623    --last;
624    if(x != 0) { while(*--last < 0) { } }
625    if(middle == last) { break; }
626  }
627}
628
629
630/*---------------------------------------------------------------------------*/
631
632/* Merge-forward with internal buffer. */
633static
634void
635ss_mergeforward(const unsigned char *T, const int *PA,
636                int *first, int *middle, int *last,
637                int *buf, int depth) {
638  int *a, *b, *c, *bufend;
639  int t;
640  int r;
641
642  bufend = buf + (middle - first) - 1;
643  ss_blockswap(buf, first, middle - first);
644
645  for(t = *(a = first), b = buf, c = middle;;) {
646    r = ss_compare(T, PA + *b, PA + *c, depth);
647    if(r < 0) {
648      do {
649        *a++ = *b;
650        if(bufend <= b) { *bufend = t; return; }
651        *b++ = *a;
652      } while(*b < 0);
653    } else if(r > 0) {
654      do {
655        *a++ = *c, *c++ = *a;
656        if(last <= c) {
657          while(b < bufend) { *a++ = *b, *b++ = *a; }
658          *a = *b, *b = t;
659          return;
660        }
661      } while(*c < 0);
662    } else {
663      *c = ~*c;
664      do {
665        *a++ = *b;
666        if(bufend <= b) { *bufend = t; return; }
667        *b++ = *a;
668      } while(*b < 0);
669
670      do {
671        *a++ = *c, *c++ = *a;
672        if(last <= c) {
673          while(b < bufend) { *a++ = *b, *b++ = *a; }
674          *a = *b, *b = t;
675          return;
676        }
677      } while(*c < 0);
678    }
679  }
680}
681
682/* Merge-backward with internal buffer. */
683static
684void
685ss_mergebackward(const unsigned char *T, const int *PA,
686                 int *first, int *middle, int *last,
687                 int *buf, int depth) {
688  const int *p1, *p2;
689  int *a, *b, *c, *bufend;
690  int t;
691  int r;
692  int x;
693
694  bufend = buf + (last - middle) - 1;
695  ss_blockswap(buf, middle, last - middle);
696
697  x = 0;
698  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
699  else                  { p1 = PA +  *bufend; }
700  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
701  else                  { p2 = PA +  *(middle - 1); }
702  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
703    r = ss_compare(T, p1, p2, depth);
704    if(0 < r) {
705      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
706      *a-- = *b;
707      if(b <= buf) { *buf = t; break; }
708      *b-- = *a;
709      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
710      else       { p1 = PA +  *b; }
711    } else if(r < 0) {
712      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
713      *a-- = *c, *c-- = *a;
714      if(c < first) {
715        while(buf < b) { *a-- = *b, *b-- = *a; }
716        *a = *b, *b = t;
717        break;
718      }
719      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
720      else       { p2 = PA +  *c; }
721    } else {
722      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
723      *a-- = ~*b;
724      if(b <= buf) { *buf = t; break; }
725      *b-- = *a;
726      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
727      *a-- = *c, *c-- = *a;
728      if(c < first) {
729        while(buf < b) { *a-- = *b, *b-- = *a; }
730        *a = *b, *b = t;
731        break;
732      }
733      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
734      else       { p1 = PA +  *b; }
735      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
736      else       { p2 = PA +  *c; }
737    }
738  }
739}
740
741/* D&C based merge. */
742static
743void
744ss_swapmerge(const unsigned char *T, const int *PA,
745             int *first, int *middle, int *last,
746             int *buf, int bufsize, int depth) {
747#define STACK_SIZE SS_SMERGE_STACKSIZE
748#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
749#define MERGE_CHECK(a, b, c)\
750  do {\
751    if(((c) & 1) ||\
752       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
753      *(a) = ~*(a);\
754    }\
755    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
756      *(b) = ~*(b);\
757    }\
758  } while(0)
759  struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
760  int *l, *r, *lm, *rm;
761  int m, len, half;
762  int ssize;
763  int check, next;
764
765  for(check = 0, ssize = 0;;) {
766    if((last - middle) <= bufsize) {
767      if((first < middle) && (middle < last)) {
768        ss_mergebackward(T, PA, first, middle, last, buf, depth);
769      }
770      MERGE_CHECK(first, last, check);
771      STACK_POP(first, middle, last, check);
772      continue;
773    }
774
775    if((middle - first) <= bufsize) {
776      if(first < middle) {
777        ss_mergeforward(T, PA, first, middle, last, buf, depth);
778      }
779      MERGE_CHECK(first, last, check);
780      STACK_POP(first, middle, last, check);
781      continue;
782    }
783
784    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
785        0 < len;
786        len = half, half >>= 1) {
787      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
788                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
789        m += half + 1;
790        half -= (len & 1) ^ 1;
791      }
792    }
793
794    if(0 < m) {
795      lm = middle - m, rm = middle + m;
796      ss_blockswap(lm, middle, m);
797      l = r = middle, next = 0;
798      if(rm < last) {
799        if(*rm < 0) {
800          *rm = ~*rm;
801          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
802          next |= 1;
803        } else if(first < lm) {
804          for(; *r < 0; ++r) { }
805          next |= 2;
806        }
807      }
808
809      if((l - first) <= (last - r)) {
810        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
811        middle = lm, last = l, check = (check & 3) | (next & 4);
812      } else {
813        if((next & 2) && (r == middle)) { next ^= 6; }
814        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
815        first = r, middle = rm, check = (next & 3) | (check & 4);
816      }
817    } else {
818      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
819        *middle = ~*middle;
820      }
821      MERGE_CHECK(first, last, check);
822      STACK_POP(first, middle, last, check);
823    }
824  }
825#undef STACK_SIZE
826}
827
828#endif /* SS_BLOCKSIZE != 0 */
829
830
831/*---------------------------------------------------------------------------*/
832
833/* Substring sort */
834static
835void
836sssort(const unsigned char *T, const int *PA,
837       int *first, int *last,
838       int *buf, int bufsize,
839       int depth, int n, int lastsuffix) {
840  int *a;
841#if SS_BLOCKSIZE != 0
842  int *b, *middle, *curbuf;
843  int j, k, curbufsize, limit;
844#endif
845  int i;
846
847  if(lastsuffix != 0) { ++first; }
848
849#if SS_BLOCKSIZE == 0
850  ss_mintrosort(T, PA, first, last, depth);
851#else
852  if((bufsize < SS_BLOCKSIZE) &&
853      (bufsize < (last - first)) &&
854      (bufsize < (limit = ss_isqrt(last - first)))) {
855    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
856    buf = middle = last - limit, bufsize = limit;
857  } else {
858    middle = last, limit = 0;
859  }
860  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
861#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
862    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
863#elif 1 < SS_BLOCKSIZE
864    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
865#endif
866    curbufsize = last - (a + SS_BLOCKSIZE);
867    curbuf = a + SS_BLOCKSIZE;
868    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
869    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
870      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
871    }
872  }
873#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
874  ss_mintrosort(T, PA, a, middle, depth);
875#elif 1 < SS_BLOCKSIZE
876  ss_insertionsort(T, PA, a, middle, depth);
877#endif
878  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
879    if(i & 1) {
880      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
881      a -= k;
882    }
883  }
884  if(limit != 0) {
885#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
886    ss_mintrosort(T, PA, middle, last, depth);
887#elif 1 < SS_BLOCKSIZE
888    ss_insertionsort(T, PA, middle, last, depth);
889#endif
890    ss_inplacemerge(T, PA, first, middle, last, depth);
891  }
892#endif
893
894  if(lastsuffix != 0) {
895    /* Insert last type B* suffix. */
896    int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
897    for(a = first, i = *(first - 1);
898        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
899        ++a) {
900      *(a - 1) = *a;
901    }
902    *(a - 1) = i;
903  }
904}
905
906
907/*---------------------------------------------------------------------------*/
908
909static INLINE
910int
911tr_ilg(int n) {
912  return (n & 0xffff0000) ?
913          ((n & 0xff000000) ?
914            24 + lg_table[(n >> 24) & 0xff] :
915            16 + lg_table[(n >> 16) & 0xff]) :
916          ((n & 0x0000ff00) ?
917             8 + lg_table[(n >>  8) & 0xff] :
918             0 + lg_table[(n >>  0) & 0xff]);
919}
920
921
922/*---------------------------------------------------------------------------*/
923
924/* Simple insertionsort for small size groups. */
925static
926void
927tr_insertionsort(const int *ISAd, int *first, int *last) {
928  int *a, *b;
929  int t, r;
930
931  for(a = first + 1; a < last; ++a) {
932    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
933      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
934      if(b < first) { break; }
935    }
936    if(r == 0) { *b = ~*b; }
937    *(b + 1) = t;
938  }
939}
940
941
942/*---------------------------------------------------------------------------*/
943
944static INLINE
945void
946tr_fixdown(const int *ISAd, int *SA, int i, int size) {
947  int j, k;
948  int v;
949  int c, d, e;
950
951  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
952    d = ISAd[SA[k = j++]];
953    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
954    if(d <= c) { break; }
955  }
956  SA[i] = v;
957}
958
959/* Simple top-down heapsort. */
960static
961void
962tr_heapsort(const int *ISAd, int *SA, int size) {
963  int i, m;
964  int t;
965
966  m = size;
967  if((size % 2) == 0) {
968    m--;
969    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
970  }
971
972  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
973  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
974  for(i = m - 1; 0 < i; --i) {
975    t = SA[0], SA[0] = SA[i];
976    tr_fixdown(ISAd, SA, 0, i);
977    SA[i] = t;
978  }
979}
980
981
982/*---------------------------------------------------------------------------*/
983
984/* Returns the median of three elements. */
985static INLINE
986int *
987tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
988  int *t;
989  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
990  if(ISAd[*v2] > ISAd[*v3]) {
991    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
992    else { return v3; }
993  }
994  return v2;
995}
996
997/* Returns the median of five elements. */
998static INLINE
999int *
1000tr_median5(const int *ISAd,
1001           int *v1, int *v2, int *v3, int *v4, int *v5) {
1002  int *t;
1003  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
1004  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
1005  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
1006  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
1007  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
1008  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
1009  return v3;
1010}
1011
1012/* Returns the pivot element. */
1013static INLINE
1014int *
1015tr_pivot(const int *ISAd, int *first, int *last) {
1016  int *middle;
1017  int t;
1018
1019  t = last - first;
1020  middle = first + t / 2;
1021
1022  if(t <= 512) {
1023    if(t <= 32) {
1024      return tr_median3(ISAd, first, middle, last - 1);
1025    } else {
1026      t >>= 2;
1027      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1028    }
1029  }
1030  t >>= 3;
1031  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
1032  middle = tr_median3(ISAd, middle - t, middle, middle + t);
1033  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1034  return tr_median3(ISAd, first, middle, last);
1035}
1036
1037
1038/*---------------------------------------------------------------------------*/
1039
1040typedef struct _trbudget_t trbudget_t;
1041struct _trbudget_t {
1042  int chance;
1043  int remain;
1044  int incval;
1045  int count;
1046};
1047
1048static INLINE
1049void
1050trbudget_init(trbudget_t *budget, int chance, int incval) {
1051  budget->chance = chance;
1052  budget->remain = budget->incval = incval;
1053}
1054
1055static INLINE
1056int
1057trbudget_check(trbudget_t *budget, int size) {
1058  if(size <= budget->remain) { budget->remain -= size; return 1; }
1059  if(budget->chance == 0) { budget->count += size; return 0; }
1060  budget->remain += budget->incval - size;
1061  budget->chance -= 1;
1062  return 1;
1063}
1064
1065
1066/*---------------------------------------------------------------------------*/
1067
1068static INLINE
1069void
1070tr_partition(const int *ISAd,
1071             int *first, int *middle, int *last,
1072             int **pa, int **pb, int v) {
1073  int *a, *b, *c, *d, *e, *f;
1074  int t, s;
1075  int x = 0;
1076
1077  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1078  if(((a = b) < last) && (x < v)) {
1079    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1080      if(x == v) { SWAP(*b, *a); ++a; }
1081    }
1082  }
1083  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1084  if((b < (d = c)) && (x > v)) {
1085    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1086      if(x == v) { SWAP(*c, *d); --d; }
1087    }
1088  }
1089  for(; b < c;) {
1090    SWAP(*b, *c);
1091    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1092      if(x == v) { SWAP(*b, *a); ++a; }
1093    }
1094    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1095      if(x == v) { SWAP(*c, *d); --d; }
1096    }
1097  }
1098
1099  if(a <= d) {
1100    c = b - 1;
1101    if((s = a - first) > (t = b - a)) { s = t; }
1102    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1103    if((s = d - c) > (t = last - d - 1)) { s = t; }
1104    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1105    first += (b - a), last -= (d - c);
1106  }
1107  *pa = first, *pb = last;
1108}
1109
1110static
1111void
1112tr_copy(int *ISA, const int *SA,
1113        int *first, int *a, int *b, int *last,
1114        int depth) {
1115  /* sort suffixes of middle partition
1116     by using sorted order of suffixes of left and right partition. */
1117  int *c, *d, *e;
1118  int s, v;
1119
1120  v = b - SA - 1;
1121  for(c = first, d = a - 1; c <= d; ++c) {
1122    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1123      *++d = s;
1124      ISA[s] = d - SA;
1125    }
1126  }
1127  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1128    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1129      *--d = s;
1130      ISA[s] = d - SA;
1131    }
1132  }
1133}
1134
1135static
1136void
1137tr_partialcopy(int *ISA, const int *SA,
1138               int *first, int *a, int *b, int *last,
1139               int depth) {
1140  int *c, *d, *e;
1141  int s, v;
1142  int rank, lastrank, newrank = -1;
1143
1144  v = b - SA - 1;
1145  lastrank = -1;
1146  for(c = first, d = a - 1; c <= d; ++c) {
1147    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1148      *++d = s;
1149      rank = ISA[s + depth];
1150      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1151      ISA[s] = newrank;
1152    }
1153  }
1154
1155  lastrank = -1;
1156  for(e = d; first <= e; --e) {
1157    rank = ISA[*e];
1158    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1159    if(newrank != rank) { ISA[*e] = newrank; }
1160  }
1161
1162  lastrank = -1;
1163  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1164    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1165      *--d = s;
1166      rank = ISA[s + depth];
1167      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1168      ISA[s] = newrank;
1169    }
1170  }
1171}
1172
1173static
1174void
1175tr_introsort(int *ISA, const int *ISAd,
1176             int *SA, int *first, int *last,
1177             trbudget_t *budget) {
1178#define STACK_SIZE TR_STACKSIZE
1179  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1180  int *a, *b, *c;
1181  int t;
1182  int v, x = 0;
1183  int incr = ISAd - ISA;
1184  int limit, next;
1185  int ssize, trlink = -1;
1186
1187  for(ssize = 0, limit = tr_ilg(last - first);;) {
1188
1189    if(limit < 0) {
1190      if(limit == -1) {
1191        /* tandem repeat partition */
1192        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1193
1194        /* update ranks */
1195        if(a < last) {
1196          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1197        }
1198        if(b < last) {
1199          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1200        }
1201
1202        /* push */
1203        if(1 < (b - a)) {
1204          STACK_PUSH5(NULL, a, b, 0, 0);
1205          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1206          trlink = ssize - 2;
1207        }
1208        if((a - first) <= (last - b)) {
1209          if(1 < (a - first)) {
1210            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1211            last = a, limit = tr_ilg(a - first);
1212          } else if(1 < (last - b)) {
1213            first = b, limit = tr_ilg(last - b);
1214          } else {
1215            STACK_POP5(ISAd, first, last, limit, trlink);
1216          }
1217        } else {
1218          if(1 < (last - b)) {
1219            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1220            first = b, limit = tr_ilg(last - b);
1221          } else if(1 < (a - first)) {
1222            last = a, limit = tr_ilg(a - first);
1223          } else {
1224            STACK_POP5(ISAd, first, last, limit, trlink);
1225          }
1226        }
1227      } else if(limit == -2) {
1228        /* tandem repeat copy */
1229        a = stack[--ssize].b, b = stack[ssize].c;
1230        if(stack[ssize].d == 0) {
1231          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1232        } else {
1233          if(0 <= trlink) { stack[trlink].d = -1; }
1234          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1235        }
1236        STACK_POP5(ISAd, first, last, limit, trlink);
1237      } else {
1238        /* sorted partition */
1239        if(0 <= *first) {
1240          a = first;
1241          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1242          first = a;
1243        }
1244        if(first < last) {
1245          a = first; do { *a = ~*a; } while(*++a < 0);
1246          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1247          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1248
1249          /* push */
1250          if(trbudget_check(budget, a - first)) {
1251            if((a - first) <= (last - a)) {
1252              STACK_PUSH5(ISAd, a, last, -3, trlink);
1253              ISAd += incr, last = a, limit = next;
1254            } else {
1255              if(1 < (last - a)) {
1256                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1257                first = a, limit = -3;
1258              } else {
1259                ISAd += incr, last = a, limit = next;
1260              }
1261            }
1262          } else {
1263            if(0 <= trlink) { stack[trlink].d = -1; }
1264            if(1 < (last - a)) {
1265              first = a, limit = -3;
1266            } else {
1267              STACK_POP5(ISAd, first, last, limit, trlink);
1268            }
1269          }
1270        } else {
1271          STACK_POP5(ISAd, first, last, limit, trlink);
1272        }
1273      }
1274      continue;
1275    }
1276
1277    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1278      tr_insertionsort(ISAd, first, last);
1279      limit = -3;
1280      continue;
1281    }
1282
1283    if(limit-- == 0) {
1284      tr_heapsort(ISAd, first, last - first);
1285      for(a = last - 1; first < a; a = b) {
1286        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1287      }
1288      limit = -3;
1289      continue;
1290    }
1291
1292    /* choose pivot */
1293    a = tr_pivot(ISAd, first, last);
1294    SWAP(*first, *a);
1295    v = ISAd[*first];
1296
1297    /* partition */
1298    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1299    if((last - first) != (b - a)) {
1300      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1301
1302      /* update ranks */
1303      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1304      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1305
1306      /* push */
1307      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1308        if((a - first) <= (last - b)) {
1309          if((last - b) <= (b - a)) {
1310            if(1 < (a - first)) {
1311              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1312              STACK_PUSH5(ISAd, b, last, limit, trlink);
1313              last = a;
1314            } else if(1 < (last - b)) {
1315              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1316              first = b;
1317            } else {
1318              ISAd += incr, first = a, last = b, limit = next;
1319            }
1320          } else if((a - first) <= (b - a)) {
1321            if(1 < (a - first)) {
1322              STACK_PUSH5(ISAd, b, last, limit, trlink);
1323              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1324              last = a;
1325            } else {
1326              STACK_PUSH5(ISAd, b, last, limit, trlink);
1327              ISAd += incr, first = a, last = b, limit = next;
1328            }
1329          } else {
1330            STACK_PUSH5(ISAd, b, last, limit, trlink);
1331            STACK_PUSH5(ISAd, first, a, limit, trlink);
1332            ISAd += incr, first = a, last = b, limit = next;
1333          }
1334        } else {
1335          if((a - first) <= (b - a)) {
1336            if(1 < (last - b)) {
1337              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1338              STACK_PUSH5(ISAd, first, a, limit, trlink);
1339              first = b;
1340            } else if(1 < (a - first)) {
1341              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1342              last = a;
1343            } else {
1344              ISAd += incr, first = a, last = b, limit = next;
1345            }
1346          } else if((last - b) <= (b - a)) {
1347            if(1 < (last - b)) {
1348              STACK_PUSH5(ISAd, first, a, limit, trlink);
1349              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1350              first = b;
1351            } else {
1352              STACK_PUSH5(ISAd, first, a, limit, trlink);
1353              ISAd += incr, first = a, last = b, limit = next;
1354            }
1355          } else {
1356            STACK_PUSH5(ISAd, first, a, limit, trlink);
1357            STACK_PUSH5(ISAd, b, last, limit, trlink);
1358            ISAd += incr, first = a, last = b, limit = next;
1359          }
1360        }
1361      } else {
1362        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1363        if((a - first) <= (last - b)) {
1364          if(1 < (a - first)) {
1365            STACK_PUSH5(ISAd, b, last, limit, trlink);
1366            last = a;
1367          } else if(1 < (last - b)) {
1368            first = b;
1369          } else {
1370            STACK_POP5(ISAd, first, last, limit, trlink);
1371          }
1372        } else {
1373          if(1 < (last - b)) {
1374            STACK_PUSH5(ISAd, first, a, limit, trlink);
1375            first = b;
1376          } else if(1 < (a - first)) {
1377            last = a;
1378          } else {
1379            STACK_POP5(ISAd, first, last, limit, trlink);
1380          }
1381        }
1382      }
1383    } else {
1384      if(trbudget_check(budget, last - first)) {
1385        limit = tr_ilg(last - first), ISAd += incr;
1386      } else {
1387        if(0 <= trlink) { stack[trlink].d = -1; }
1388        STACK_POP5(ISAd, first, last, limit, trlink);
1389      }
1390    }
1391  }
1392#undef STACK_SIZE
1393}
1394
1395
1396
1397/*---------------------------------------------------------------------------*/
1398
1399/* Tandem repeat sort */
1400static
1401void
1402trsort(int *ISA, int *SA, int n, int depth) {
1403  int *ISAd;
1404  int *first, *last;
1405  trbudget_t budget;
1406  int t, skip, unsorted;
1407
1408  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1409/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1410  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1411    first = SA;
1412    skip = 0;
1413    unsorted = 0;
1414    do {
1415      if((t = *first) < 0) { first -= t; skip += t; }
1416      else {
1417        if(skip != 0) { *(first + skip) = skip; skip = 0; }
1418        last = SA + ISA[t] + 1;
1419        if(1 < (last - first)) {
1420          budget.count = 0;
1421          tr_introsort(ISA, ISAd, SA, first, last, &budget);
1422          if(budget.count != 0) { unsorted += budget.count; }
1423          else { skip = first - last; }
1424        } else if((last - first) == 1) {
1425          skip = -1;
1426        }
1427        first = last;
1428      }
1429    } while(first < (SA + n));
1430    if(skip != 0) { *(first + skip) = skip; }
1431    if(unsorted == 0) { break; }
1432  }
1433}
1434
1435
1436/*---------------------------------------------------------------------------*/
1437
1438/* Sorts suffixes of type B*. */
1439static
1440int
1441sort_typeBstar(const unsigned char *T, int *SA,
1442               int *bucket_A, int *bucket_B,
1443               int n, int openMP) {
1444  int *PAb, *ISAb, *buf;
1445#ifdef LIBBSC_OPENMP
1446  int *curbuf;
1447  int l;
1448#endif
1449  int i, j, k, t, m, bufsize;
1450  int c0, c1;
1451#ifdef LIBBSC_OPENMP
1452  int d0, d1;
1453#endif
1454  (void)openMP;
1455
1456  /* Initialize bucket arrays. */
1457  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1458  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1459
1460  /* Count the number of occurrences of the first one or two characters of each
1461     type A, B and B* suffix. Moreover, store the beginning position of all
1462     type B* suffixes into the array SA. */
1463  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1464    /* type A suffix. */
1465    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1466    if(0 <= i) {
1467      /* type B* suffix. */
1468      ++BUCKET_BSTAR(c0, c1);
1469      SA[--m] = i;
1470      /* type B suffix. */
1471      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1472        ++BUCKET_B(c0, c1);
1473      }
1474    }
1475  }
1476  m = n - m;
1477/*
1478note:
1479  A type B* suffix is lexicographically smaller than a type B suffix that
1480  begins with the same first two characters.
1481*/
1482
1483  /* Calculate the index of start/end point of each bucket. */
1484  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1485    t = i + BUCKET_A(c0);
1486    BUCKET_A(c0) = i + j; /* start point */
1487    i = t + BUCKET_B(c0, c0);
1488    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1489      j += BUCKET_BSTAR(c0, c1);
1490      BUCKET_BSTAR(c0, c1) = j; /* end point */
1491      i += BUCKET_B(c0, c1);
1492    }
1493  }
1494
1495  if(0 < m) {
1496    /* Sort the type B* suffixes by their first two characters. */
1497    PAb = SA + n - m; ISAb = SA + m;
1498    for(i = m - 2; 0 <= i; --i) {
1499      t = PAb[i], c0 = T[t], c1 = T[t + 1];
1500      SA[--BUCKET_BSTAR(c0, c1)] = i;
1501    }
1502    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1503    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1504
1505    /* Sort the type B* substrings using sssort. */
1506#ifdef LIBBSC_OPENMP
1507    if (openMP)
1508    {
1509        buf = SA + m;
1510        c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1511#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
1512        {
1513          bufsize = (n - (2 * m)) / omp_get_num_threads();
1514          curbuf = buf + omp_get_thread_num() * bufsize;
1515          k = 0;
1516          for(;;) {
1517            #pragma omp critical(sssort_lock)
1518            {
1519              if(0 < (l = j)) {
1520                d0 = c0, d1 = c1;
1521                do {
1522                  k = BUCKET_BSTAR(d0, d1);
1523                  if(--d1 <= d0) {
1524                    d1 = ALPHABET_SIZE - 1;
1525                    if(--d0 < 0) { break; }
1526                  }
1527                } while(((l - k) <= 1) && (0 < (l = k)));
1528                c0 = d0, c1 = d1, j = k;
1529              }
1530            }
1531            if(l == 0) { break; }
1532            sssort(T, PAb, SA + k, SA + l,
1533                   curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1534          }
1535        }
1536    }
1537    else
1538    {
1539        buf = SA + m, bufsize = n - (2 * m);
1540        for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1541          for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1542            i = BUCKET_BSTAR(c0, c1);
1543            if(1 < (j - i)) {
1544              sssort(T, PAb, SA + i, SA + j,
1545                     buf, bufsize, 2, n, *(SA + i) == (m - 1));
1546            }
1547          }
1548        }
1549    }
1550#else
1551    buf = SA + m, bufsize = n - (2 * m);
1552    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1553      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1554        i = BUCKET_BSTAR(c0, c1);
1555        if(1 < (j - i)) {
1556          sssort(T, PAb, SA + i, SA + j,
1557                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
1558        }
1559      }
1560    }
1561#endif
1562
1563    /* Compute ranks of type B* substrings. */
1564    for(i = m - 1; 0 <= i; --i) {
1565      if(0 <= SA[i]) {
1566        j = i;
1567        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1568        SA[i + 1] = i - j;
1569        if(i <= 0) { break; }
1570      }
1571      j = i;
1572      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1573      ISAb[SA[i]] = j;
1574    }
1575
1576    /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577    trsort(ISAb, SA, m, 1);
1578
1579    /* Set the sorted order of type B* suffixes. */
1580    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582      if(0 <= i) {
1583        t = i;
1584        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1585        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1586      }
1587    }
1588
1589    /* Calculate the index of start/end point of each bucket. */
1590    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1591    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1592      i = BUCKET_A(c0 + 1) - 1;
1593      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1594        t = i - BUCKET_B(c0, c1);
1595        BUCKET_B(c0, c1) = i; /* end point */
1596
1597        /* Move all type B* suffixes to the correct position. */
1598        for(i = t, j = BUCKET_BSTAR(c0, c1);
1599            j <= k;
1600            --i, --k) { SA[i] = SA[k]; }
1601      }
1602      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1603      BUCKET_B(c0, c0) = i; /* end point */
1604    }
1605  }
1606
1607  return m;
1608}
1609
1610/* Constructs the suffix array by using the sorted order of type B* suffixes. */
1611static
1612void
1613construct_SA(const unsigned char *T, int *SA,
1614             int *bucket_A, int *bucket_B,
1615             int n, int m) {
1616  int *i, *j, *k;
1617  int s;
1618  int c0, c1, c2;
1619
1620  if(0 < m) {
1621    /* Construct the sorted order of type B suffixes by using
1622       the sorted order of type B* suffixes. */
1623    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1624      /* Scan the suffix array from right to left. */
1625      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1626          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1627          i <= j;
1628          --j) {
1629        if(0 < (s = *j)) {
1630          assert(T[s] == c1);
1631          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1632          assert(T[s - 1] <= T[s]);
1633          *j = ~s;
1634          c0 = T[--s];
1635          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1636          if(c0 != c2) {
1637            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638            k = SA + BUCKET_B(c2 = c0, c1);
1639          }
1640          assert(k < j); assert(k != NULL);
1641          *k-- = s;
1642        } else {
1643          assert(((s == 0) && (T[s] == c1)) || (s < 0));
1644          *j = ~s;
1645        }
1646      }
1647    }
1648  }
1649
1650  /* Construct the suffix array by using
1651     the sorted order of type B suffixes. */
1652  k = SA + BUCKET_A(c2 = T[n - 1]);
1653  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1654  /* Scan the suffix array from left to right. */
1655  for(i = SA, j = SA + n; i < j; ++i) {
1656    if(0 < (s = *i)) {
1657      assert(T[s - 1] >= T[s]);
1658      c0 = T[--s];
1659      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1660      if(c0 != c2) {
1661        BUCKET_A(c2) = k - SA;
1662        k = SA + BUCKET_A(c2 = c0);
1663      }
1664      assert(i < k);
1665      *k++ = s;
1666    } else {
1667      assert(s < 0);
1668      *i = ~s;
1669    }
1670  }
1671}
1672
1673/* Constructs the burrows-wheeler transformed string directly
1674   by using the sorted order of type B* suffixes. */
1675static
1676int
1677construct_BWT(const unsigned char *T, int *SA,
1678              int *bucket_A, int *bucket_B,
1679              int n, int m) {
1680  int *i, *j, *k, *orig;
1681  int s;
1682  int c0, c1, c2;
1683
1684  if(0 < m) {
1685    /* Construct the sorted order of type B suffixes by using
1686       the sorted order of type B* suffixes. */
1687    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1688      /* Scan the suffix array from right to left. */
1689      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1690          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1691          i <= j;
1692          --j) {
1693        if(0 < (s = *j)) {
1694          assert(T[s] == c1);
1695          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1696          assert(T[s - 1] <= T[s]);
1697          c0 = T[--s];
1698          *j = ~((int)c0);
1699          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1700          if(c0 != c2) {
1701            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702            k = SA + BUCKET_B(c2 = c0, c1);
1703          }
1704          assert(k < j); assert(k != NULL);
1705          *k-- = s;
1706        } else if(s != 0) {
1707          *j = ~s;
1708#ifndef NDEBUG
1709        } else {
1710          assert(T[s] == c1);
1711#endif
1712        }
1713      }
1714    }
1715  }
1716
1717  /* Construct the BWTed string by using
1718     the sorted order of type B suffixes. */
1719  k = SA + BUCKET_A(c2 = T[n - 1]);
1720  *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
1721  /* Scan the suffix array from left to right. */
1722  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1723    if(0 < (s = *i)) {
1724      assert(T[s - 1] >= T[s]);
1725      c0 = T[--s];
1726      *i = c0;
1727      if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1728      if(c0 != c2) {
1729        BUCKET_A(c2) = k - SA;
1730        k = SA + BUCKET_A(c2 = c0);
1731      }
1732      assert(i < k);
1733      *k++ = s;
1734    } else if(s != 0) {
1735      *i = ~s;
1736    } else {
1737      orig = i;
1738    }
1739  }
1740
1741  return orig - SA;
1742}
1743
1744/* Constructs the burrows-wheeler transformed string directly
1745   by using the sorted order of type B* suffixes. */
1746static
1747int
1748construct_BWT_indexes(const unsigned char *T, int *SA,
1749                      int *bucket_A, int *bucket_B,
1750                      int n, int m,
1751                      unsigned char * num_indexes, int * indexes) {
1752  int *i, *j, *k, *orig;
1753  int s;
1754  int c0, c1, c2;
1755
1756  int mod = n / 8;
1757  {
1758      mod |= mod >> 1;  mod |= mod >> 2;
1759      mod |= mod >> 4;  mod |= mod >> 8;
1760      mod |= mod >> 16; mod >>= 1;
1761
1762      *num_indexes = (unsigned char)((n - 1) / (mod + 1));
1763  }
1764
1765  if(0 < m) {
1766    /* Construct the sorted order of type B suffixes by using
1767       the sorted order of type B* suffixes. */
1768    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1769      /* Scan the suffix array from right to left. */
1770      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1771          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1772          i <= j;
1773          --j) {
1774        if(0 < (s = *j)) {
1775          assert(T[s] == c1);
1776          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1777          assert(T[s - 1] <= T[s]);
1778
1779          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
1780
1781          c0 = T[--s];
1782          *j = ~((int)c0);
1783          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1784          if(c0 != c2) {
1785            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786            k = SA + BUCKET_B(c2 = c0, c1);
1787          }
1788          assert(k < j); assert(k != NULL);
1789          *k-- = s;
1790        } else if(s != 0) {
1791          *j = ~s;
1792#ifndef NDEBUG
1793        } else {
1794          assert(T[s] == c1);
1795#endif
1796        }
1797      }
1798    }
1799  }
1800
1801  /* Construct the BWTed string by using
1802     the sorted order of type B suffixes. */
1803  k = SA + BUCKET_A(c2 = T[n - 1]);
1804  if (T[n - 2] < c2) {
1805    if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
1806    *k++ = ~((int)T[n - 2]);
1807  }
1808  else {
1809    *k++ = n - 1;
1810  }
1811
1812  /* Scan the suffix array from left to right. */
1813  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1814    if(0 < (s = *i)) {
1815      assert(T[s - 1] >= T[s]);
1816
1817      if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
1818
1819      c0 = T[--s];
1820      *i = c0;
1821      if(c0 != c2) {
1822        BUCKET_A(c2) = k - SA;
1823        k = SA + BUCKET_A(c2 = c0);
1824      }
1825      assert(i < k);
1826      if((0 < s) && (T[s - 1] < c0)) {
1827          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
1828          *k++ = ~((int)T[s - 1]);
1829      } else
1830        *k++ = s;
1831    } else if(s != 0) {
1832      *i = ~s;
1833    } else {
1834      orig = i;
1835    }
1836  }
1837
1838  return orig - SA;
1839}
1840
1841
1842/*---------------------------------------------------------------------------*/
1843
1844/*- Function -*/
1845
1846int
1847divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
1848  int *bucket_A, *bucket_B;
1849  int m;
1850  int err = 0;
1851
1852  /* Check arguments. */
1853  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
1854  else if(n == 0) { return 0; }
1855  else if(n == 1) { SA[0] = 0; return 0; }
1856  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
1857
1858  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1859  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1860
1861  /* Suffixsort. */
1862  if((bucket_A != NULL) && (bucket_B != NULL)) {
1863    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
1864    construct_SA(T, SA, bucket_A, bucket_B, n, m);
1865  } else {
1866    err = -2;
1867  }
1868
1869  free(bucket_B);
1870  free(bucket_A);
1871
1872  return err;
1873}
1874
1875int
1876divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
1877  int *B;
1878  int *bucket_A, *bucket_B;
1879  int m, pidx, i;
1880
1881  /* Check arguments. */
1882  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
1883  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
1884
1885  if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
1886  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1887  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1888
1889  /* Burrows-Wheeler Transform. */
1890  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1891    m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
1892
1893    if (num_indexes == NULL || indexes == NULL) {
1894        pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1895    } else {
1896        pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
1897    }
1898
1899    /* Copy to output string. */
1900    U[0] = T[n - 1];
1901    for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
1902    for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
1903    pidx += 1;
1904  } else {
1905    pidx = -2;
1906  }
1907
1908  free(bucket_B);
1909  free(bucket_A);
1910  if(A == NULL) { free(B); }
1911
1912  return pidx;
1913}
1914