1/* 2Copyright 2011 Google Inc. All Rights Reserved. 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15 16Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18*/ 19 20#include "blocksplitter.h" 21 22#include <assert.h> 23#include <stdio.h> 24#include <stdlib.h> 25 26#include "deflate.h" 27#include "lz77.h" 28#include "squeeze.h" 29#include "tree.h" 30#include "util.h" 31 32/* 33The "f" for the FindMinimum function below. 34i: the current parameter of f(i) 35context: for your implementation 36*/ 37typedef double FindMinimumFun(size_t i, void* context); 38 39/* 40Finds minimum of function f(i) where is is of type size_t, f(i) is of type 41double, i is in range start-end (excluding end). 42*/ 43static size_t FindMinimum(FindMinimumFun f, void* context, 44 size_t start, size_t end) { 45 if (end - start < 1024) { 46 double best = ZOPFLI_LARGE_FLOAT; 47 size_t result = start; 48 size_t i; 49 for (i = start; i < end; i++) { 50 double v = f(i, context); 51 if (v < best) { 52 best = v; 53 result = i; 54 } 55 } 56 return result; 57 } else { 58 /* Try to find minimum faster by recursively checking multiple points. */ 59#define NUM 9 /* Good value: 9. */ 60 size_t i; 61 size_t p[NUM]; 62 double vp[NUM]; 63 size_t besti; 64 double best; 65 double lastbest = ZOPFLI_LARGE_FLOAT; 66 size_t pos = start; 67 68 for (;;) { 69 if (end - start <= NUM) break; 70 71 for (i = 0; i < NUM; i++) { 72 p[i] = start + (i + 1) * ((end - start) / (NUM + 1)); 73 vp[i] = f(p[i], context); 74 } 75 besti = 0; 76 best = vp[0]; 77 for (i = 1; i < NUM; i++) { 78 if (vp[i] < best) { 79 best = vp[i]; 80 besti = i; 81 } 82 } 83 if (best > lastbest) break; 84 85 start = besti == 0 ? start : p[besti - 1]; 86 end = besti == NUM - 1 ? end : p[besti + 1]; 87 88 pos = p[besti]; 89 lastbest = best; 90 } 91 return pos; 92#undef NUM 93 } 94} 95 96/* 97Returns estimated cost of a block in bits. It includes the size to encode the 98tree and the size to encode all literal, length and distance symbols and their 99extra bits. 100 101litlens: lz77 lit/lengths 102dists: ll77 distances 103lstart: start of block 104lend: end of block (not inclusive) 105*/ 106static double EstimateCost(const unsigned short* litlens, 107 const unsigned short* dists, 108 size_t lstart, size_t lend) { 109 return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2); 110} 111 112typedef struct SplitCostContext { 113 const unsigned short* litlens; 114 const unsigned short* dists; 115 size_t llsize; 116 size_t start; 117 size_t end; 118} SplitCostContext; 119 120 121/* 122Gets the cost which is the sum of the cost of the left and the right section 123of the data. 124type: FindMinimumFun 125*/ 126static double SplitCost(size_t i, void* context) { 127 SplitCostContext* c = (SplitCostContext*)context; 128 return EstimateCost(c->litlens, c->dists, c->start, i) + 129 EstimateCost(c->litlens, c->dists, i, c->end); 130} 131 132static void AddSorted(size_t value, size_t** out, size_t* outsize) { 133 size_t i; 134 ZOPFLI_APPEND_DATA(value, out, outsize); 135 if (*outsize > 0) { 136 for (i = 0; i < *outsize - 1; i++) { 137 if ((*out)[i] > value) { 138 size_t j; 139 for (j = *outsize - 1; j > i; j--) { 140 (*out)[j] = (*out)[j - 1]; 141 } 142 (*out)[i] = value; 143 break; 144 } 145 } 146 } 147} 148 149/* 150Prints the block split points as decimal and hex values in the terminal. 151*/ 152static void PrintBlockSplitPoints(const unsigned short* litlens, 153 const unsigned short* dists, 154 size_t llsize, const size_t* lz77splitpoints, 155 size_t nlz77points) { 156 size_t* splitpoints = 0; 157 size_t npoints = 0; 158 size_t i; 159 /* The input is given as lz77 indices, but we want to see the uncompressed 160 index values. */ 161 size_t pos = 0; 162 if (nlz77points > 0) { 163 for (i = 0; i < llsize; i++) { 164 size_t length = dists[i] == 0 ? 1 : litlens[i]; 165 if (lz77splitpoints[npoints] == i) { 166 ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints); 167 if (npoints == nlz77points) break; 168 } 169 pos += length; 170 } 171 } 172 assert(npoints == nlz77points); 173 174 fprintf(stderr, "block split points: "); 175 for (i = 0; i < npoints; i++) { 176 fprintf(stderr, "%d ", (int)splitpoints[i]); 177 } 178 fprintf(stderr, "(hex:"); 179 for (i = 0; i < npoints; i++) { 180 fprintf(stderr, " %x", (int)splitpoints[i]); 181 } 182 fprintf(stderr, ")\n"); 183 184 free(splitpoints); 185} 186 187/* 188Finds next block to try to split, the largest of the available ones. 189The largest is chosen to make sure that if only a limited amount of blocks is 190requested, their sizes are spread evenly. 191llsize: the size of the LL77 data, which is the size of the done array here. 192done: array indicating which blocks starting at that position are no longer 193 splittable (splitting them increases rather than decreases cost). 194splitpoints: the splitpoints found so far. 195npoints: the amount of splitpoints found so far. 196lstart: output variable, giving start of block. 197lend: output variable, giving end of block. 198returns 1 if a block was found, 0 if no block found (all are done). 199*/ 200static int FindLargestSplittableBlock( 201 size_t llsize, const unsigned char* done, 202 const size_t* splitpoints, size_t npoints, 203 size_t* lstart, size_t* lend) { 204 size_t longest = 0; 205 int found = 0; 206 size_t i; 207 for (i = 0; i <= npoints; i++) { 208 size_t start = i == 0 ? 0 : splitpoints[i - 1]; 209 size_t end = i == npoints ? llsize - 1 : splitpoints[i]; 210 if (!done[start] && end - start > longest) { 211 *lstart = start; 212 *lend = end; 213 found = 1; 214 longest = end - start; 215 } 216 } 217 return found; 218} 219 220void ZopfliBlockSplitLZ77(const ZopfliOptions* options, 221 const unsigned short* litlens, 222 const unsigned short* dists, 223 size_t llsize, size_t maxblocks, 224 size_t** splitpoints, size_t* npoints) { 225 size_t lstart, lend; 226 size_t i; 227 size_t llpos = 0; 228 size_t numblocks = 1; 229 unsigned char* done; 230 double splitcost, origcost; 231 232 if (llsize < 10) return; /* This code fails on tiny files. */ 233 234 done = (unsigned char*)malloc(llsize); 235 if (!done) exit(-1); /* Allocation failed. */ 236 for (i = 0; i < llsize; i++) done[i] = 0; 237 238 lstart = 0; 239 lend = llsize; 240 for (;;) { 241 SplitCostContext c; 242 243 if (maxblocks > 0 && numblocks >= maxblocks) { 244 break; 245 } 246 247 c.litlens = litlens; 248 c.dists = dists; 249 c.llsize = llsize; 250 c.start = lstart; 251 c.end = lend; 252 assert(lstart < lend); 253 llpos = FindMinimum(SplitCost, &c, lstart + 1, lend); 254 255 assert(llpos > lstart); 256 assert(llpos < lend); 257 258 splitcost = EstimateCost(litlens, dists, lstart, llpos) + 259 EstimateCost(litlens, dists, llpos, lend); 260 origcost = EstimateCost(litlens, dists, lstart, lend); 261 262 if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) { 263 done[lstart] = 1; 264 } else { 265 AddSorted(llpos, splitpoints, npoints); 266 numblocks++; 267 } 268 269 if (!FindLargestSplittableBlock( 270 llsize, done, *splitpoints, *npoints, &lstart, &lend)) { 271 break; /* No further split will probably reduce compression. */ 272 } 273 274 if (lend - lstart < 10) { 275 break; 276 } 277 } 278 279 if (options->verbose) { 280 PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints); 281 } 282 283 free(done); 284} 285 286void ZopfliBlockSplit(const ZopfliOptions* options, 287 const unsigned char* in, size_t instart, size_t inend, 288 size_t maxblocks, size_t** splitpoints, size_t* npoints) { 289 size_t pos = 0; 290 size_t i; 291 ZopfliBlockState s; 292 size_t* lz77splitpoints = 0; 293 size_t nlz77points = 0; 294 ZopfliLZ77Store store; 295 296 ZopfliInitLZ77Store(&store); 297 298 s.options = options; 299 s.blockstart = instart; 300 s.blockend = inend; 301#ifdef ZOPFLI_LONGEST_MATCH_CACHE 302 s.lmc = 0; 303#endif 304 305 *npoints = 0; 306 *splitpoints = 0; 307 308 /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal 309 results in better blocks. */ 310 ZopfliLZ77Greedy(&s, in, instart, inend, &store); 311 312 ZopfliBlockSplitLZ77(options, 313 store.litlens, store.dists, store.size, maxblocks, 314 &lz77splitpoints, &nlz77points); 315 316 /* Convert LZ77 positions to positions in the uncompressed input. */ 317 pos = instart; 318 if (nlz77points > 0) { 319 for (i = 0; i < store.size; i++) { 320 size_t length = store.dists[i] == 0 ? 1 : store.litlens[i]; 321 if (lz77splitpoints[*npoints] == i) { 322 ZOPFLI_APPEND_DATA(pos, splitpoints, npoints); 323 if (*npoints == nlz77points) break; 324 } 325 pos += length; 326 } 327 } 328 assert(*npoints == nlz77points); 329 330 free(lz77splitpoints); 331 ZopfliCleanLZ77Store(&store); 332} 333 334void ZopfliBlockSplitSimple(const unsigned char* in, 335 size_t instart, size_t inend, 336 size_t blocksize, 337 size_t** splitpoints, size_t* npoints) { 338 size_t i = instart; 339 while (i < inend) { 340 ZOPFLI_APPEND_DATA(i, splitpoints, npoints); 341 i += blocksize; 342 } 343 (void)in; 344} 345