1/*
2Copyright 2011 Google Inc. All Rights Reserved.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18*/
19
20#include "blocksplitter.h"
21
22#include <assert.h>
23#include <stdio.h>
24#include <stdlib.h>
25
26#include "deflate.h"
27#include "lz77.h"
28#include "squeeze.h"
29#include "tree.h"
30#include "util.h"
31
32/*
33The "f" for the FindMinimum function below.
34i: the current parameter of f(i)
35context: for your implementation
36*/
37typedef double FindMinimumFun(size_t i, void* context);
38
39/*
40Finds minimum of function f(i) where is is of type size_t, f(i) is of type
41double, i is in range start-end (excluding end).
42*/
43static size_t FindMinimum(FindMinimumFun f, void* context,
44                          size_t start, size_t end) {
45  if (end - start < 1024) {
46    double best = ZOPFLI_LARGE_FLOAT;
47    size_t result = start;
48    size_t i;
49    for (i = start; i < end; i++) {
50      double v = f(i, context);
51      if (v < best) {
52        best = v;
53        result = i;
54      }
55    }
56    return result;
57  } else {
58    /* Try to find minimum faster by recursively checking multiple points. */
59#define NUM 9  /* Good value: 9. */
60    size_t i;
61    size_t p[NUM];
62    double vp[NUM];
63    size_t besti;
64    double best;
65    double lastbest = ZOPFLI_LARGE_FLOAT;
66    size_t pos = start;
67
68    for (;;) {
69      if (end - start <= NUM) break;
70
71      for (i = 0; i < NUM; i++) {
72        p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
73        vp[i] = f(p[i], context);
74      }
75      besti = 0;
76      best = vp[0];
77      for (i = 1; i < NUM; i++) {
78        if (vp[i] < best) {
79          best = vp[i];
80          besti = i;
81        }
82      }
83      if (best > lastbest) break;
84
85      start = besti == 0 ? start : p[besti - 1];
86      end = besti == NUM - 1 ? end : p[besti + 1];
87
88      pos = p[besti];
89      lastbest = best;
90    }
91    return pos;
92#undef NUM
93  }
94}
95
96/*
97Returns estimated cost of a block in bits.  It includes the size to encode the
98tree and the size to encode all literal, length and distance symbols and their
99extra bits.
100
101litlens: lz77 lit/lengths
102dists: ll77 distances
103lstart: start of block
104lend: end of block (not inclusive)
105*/
106static double EstimateCost(const unsigned short* litlens,
107                           const unsigned short* dists,
108                           size_t lstart, size_t lend) {
109  return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
110}
111
112typedef struct SplitCostContext {
113  const unsigned short* litlens;
114  const unsigned short* dists;
115  size_t llsize;
116  size_t start;
117  size_t end;
118} SplitCostContext;
119
120
121/*
122Gets the cost which is the sum of the cost of the left and the right section
123of the data.
124type: FindMinimumFun
125*/
126static double SplitCost(size_t i, void* context) {
127  SplitCostContext* c = (SplitCostContext*)context;
128  return EstimateCost(c->litlens, c->dists, c->start, i) +
129      EstimateCost(c->litlens, c->dists, i, c->end);
130}
131
132static void AddSorted(size_t value, size_t** out, size_t* outsize) {
133  size_t i;
134  ZOPFLI_APPEND_DATA(value, out, outsize);
135  if (*outsize > 0) {
136    for (i = 0; i < *outsize - 1; i++) {
137      if ((*out)[i] > value) {
138        size_t j;
139        for (j = *outsize - 1; j > i; j--) {
140          (*out)[j] = (*out)[j - 1];
141        }
142        (*out)[i] = value;
143        break;
144      }
145    }
146  }
147}
148
149/*
150Prints the block split points as decimal and hex values in the terminal.
151*/
152static void PrintBlockSplitPoints(const unsigned short* litlens,
153                                  const unsigned short* dists,
154                                  size_t llsize, const size_t* lz77splitpoints,
155                                  size_t nlz77points) {
156  size_t* splitpoints = 0;
157  size_t npoints = 0;
158  size_t i;
159  /* The input is given as lz77 indices, but we want to see the uncompressed
160  index values. */
161  size_t pos = 0;
162  if (nlz77points > 0) {
163    for (i = 0; i < llsize; i++) {
164      size_t length = dists[i] == 0 ? 1 : litlens[i];
165      if (lz77splitpoints[npoints] == i) {
166        ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
167        if (npoints == nlz77points) break;
168      }
169      pos += length;
170    }
171  }
172  assert(npoints == nlz77points);
173
174  fprintf(stderr, "block split points: ");
175  for (i = 0; i < npoints; i++) {
176    fprintf(stderr, "%d ", (int)splitpoints[i]);
177  }
178  fprintf(stderr, "(hex:");
179  for (i = 0; i < npoints; i++) {
180    fprintf(stderr, " %x", (int)splitpoints[i]);
181  }
182  fprintf(stderr, ")\n");
183
184  free(splitpoints);
185}
186
187/*
188Finds next block to try to split, the largest of the available ones.
189The largest is chosen to make sure that if only a limited amount of blocks is
190requested, their sizes are spread evenly.
191llsize: the size of the LL77 data, which is the size of the done array here.
192done: array indicating which blocks starting at that position are no longer
193    splittable (splitting them increases rather than decreases cost).
194splitpoints: the splitpoints found so far.
195npoints: the amount of splitpoints found so far.
196lstart: output variable, giving start of block.
197lend: output variable, giving end of block.
198returns 1 if a block was found, 0 if no block found (all are done).
199*/
200static int FindLargestSplittableBlock(
201    size_t llsize, const unsigned char* done,
202    const size_t* splitpoints, size_t npoints,
203    size_t* lstart, size_t* lend) {
204  size_t longest = 0;
205  int found = 0;
206  size_t i;
207  for (i = 0; i <= npoints; i++) {
208    size_t start = i == 0 ? 0 : splitpoints[i - 1];
209    size_t end = i == npoints ? llsize - 1 : splitpoints[i];
210    if (!done[start] && end - start > longest) {
211      *lstart = start;
212      *lend = end;
213      found = 1;
214      longest = end - start;
215    }
216  }
217  return found;
218}
219
220void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
221                          const unsigned short* litlens,
222                          const unsigned short* dists,
223                          size_t llsize, size_t maxblocks,
224                          size_t** splitpoints, size_t* npoints) {
225  size_t lstart, lend;
226  size_t i;
227  size_t llpos = 0;
228  size_t numblocks = 1;
229  unsigned char* done;
230  double splitcost, origcost;
231
232  if (llsize < 10) return;  /* This code fails on tiny files. */
233
234  done = (unsigned char*)malloc(llsize);
235  if (!done) exit(-1); /* Allocation failed. */
236  for (i = 0; i < llsize; i++) done[i] = 0;
237
238  lstart = 0;
239  lend = llsize;
240  for (;;) {
241    SplitCostContext c;
242
243    if (maxblocks > 0 && numblocks >= maxblocks) {
244      break;
245    }
246
247    c.litlens = litlens;
248    c.dists = dists;
249    c.llsize = llsize;
250    c.start = lstart;
251    c.end = lend;
252    assert(lstart < lend);
253    llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
254
255    assert(llpos > lstart);
256    assert(llpos < lend);
257
258    splitcost = EstimateCost(litlens, dists, lstart, llpos) +
259        EstimateCost(litlens, dists, llpos, lend);
260    origcost = EstimateCost(litlens, dists, lstart, lend);
261
262    if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
263      done[lstart] = 1;
264    } else {
265      AddSorted(llpos, splitpoints, npoints);
266      numblocks++;
267    }
268
269    if (!FindLargestSplittableBlock(
270        llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
271      break;  /* No further split will probably reduce compression. */
272    }
273
274    if (lend - lstart < 10) {
275      break;
276    }
277  }
278
279  if (options->verbose) {
280    PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
281  }
282
283  free(done);
284}
285
286void ZopfliBlockSplit(const ZopfliOptions* options,
287                      const unsigned char* in, size_t instart, size_t inend,
288                      size_t maxblocks, size_t** splitpoints, size_t* npoints) {
289  size_t pos = 0;
290  size_t i;
291  ZopfliBlockState s;
292  size_t* lz77splitpoints = 0;
293  size_t nlz77points = 0;
294  ZopfliLZ77Store store;
295
296  ZopfliInitLZ77Store(&store);
297
298  s.options = options;
299  s.blockstart = instart;
300  s.blockend = inend;
301#ifdef ZOPFLI_LONGEST_MATCH_CACHE
302  s.lmc = 0;
303#endif
304
305  *npoints = 0;
306  *splitpoints = 0;
307
308  /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
309  results in better blocks. */
310  ZopfliLZ77Greedy(&s, in, instart, inend, &store);
311
312  ZopfliBlockSplitLZ77(options,
313                       store.litlens, store.dists, store.size, maxblocks,
314                       &lz77splitpoints, &nlz77points);
315
316  /* Convert LZ77 positions to positions in the uncompressed input. */
317  pos = instart;
318  if (nlz77points > 0) {
319    for (i = 0; i < store.size; i++) {
320      size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
321      if (lz77splitpoints[*npoints] == i) {
322        ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
323        if (*npoints == nlz77points) break;
324      }
325      pos += length;
326    }
327  }
328  assert(*npoints == nlz77points);
329
330  free(lz77splitpoints);
331  ZopfliCleanLZ77Store(&store);
332}
333
334void ZopfliBlockSplitSimple(const unsigned char* in,
335                            size_t instart, size_t inend,
336                            size_t blocksize,
337                            size_t** splitpoints, size_t* npoints) {
338  size_t i = instart;
339  while (i < inend) {
340    ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
341    i += blocksize;
342  }
343  (void)in;
344}
345