svndiff.c revision 289166
1/*
2 * svndiff.c -- Encoding and decoding svndiff-format deltas.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <assert.h>
26#include <string.h>
27#include "svn_delta.h"
28#include "svn_io.h"
29#include "delta.h"
30#include "svn_pools.h"
31#include "svn_private_config.h"
32#include <zlib.h>
33
34#include "private/svn_error_private.h"
35#include "private/svn_delta_private.h"
36
37/* The zlib compressBound function was not exported until 1.2.0. */
38#if ZLIB_VERNUM >= 0x1200
39#define svnCompressBound(LEN) compressBound(LEN)
40#else
41#define svnCompressBound(LEN) ((LEN) + ((LEN) >> 12) + ((LEN) >> 14) + 11)
42#endif
43
44/* For svndiff1, address/instruction/new data under this size will not
45   be compressed using zlib as a secondary compressor.  */
46#define MIN_COMPRESS_SIZE 512
47
48/* ----- Text delta to svndiff ----- */
49
50/* We make one of these and get it passed back to us in calls to the
51   window handler.  We only use it to record the write function and
52   baton passed to svn_txdelta_to_svndiff3().  */
53struct encoder_baton {
54  svn_stream_t *output;
55  svn_boolean_t header_done;
56  int version;
57  int compression_level;
58  apr_pool_t *pool;
59};
60
61/* This is at least as big as the largest size of an integer that
62   encode_int can generate; it is sufficient for creating buffers for
63   it to write into.  This assumes that integers are at most 64 bits,
64   and so 10 bytes (with 7 bits of information each) are sufficient to
65   represent them. */
66#define MAX_ENCODED_INT_LEN 10
67/* This is at least as big as the largest size for a single instruction. */
68#define MAX_INSTRUCTION_LEN (2*MAX_ENCODED_INT_LEN+1)
69/* This is at least as big as the largest possible instructions
70   section: in theory, the instructions could be SVN_DELTA_WINDOW_SIZE
71   1-byte copy-from-source instructions (though this is very unlikely). */
72#define MAX_INSTRUCTION_SECTION_LEN (SVN_DELTA_WINDOW_SIZE*MAX_INSTRUCTION_LEN)
73
74/* Encode VAL into the buffer P using the variable-length svndiff
75   integer format.  Return the incremented value of P after the
76   encoded bytes have been written.  P must point to a buffer of size
77   at least MAX_ENCODED_INT_LEN.
78
79   This encoding uses the high bit of each byte as a continuation bit
80   and the other seven bits as data bits.  High-order data bits are
81   encoded first, followed by lower-order bits, so the value can be
82   reconstructed by concatenating the data bits from left to right and
83   interpreting the result as a binary number.  Examples (brackets
84   denote byte boundaries, spaces are for clarity only):
85
86           1 encodes as [0 0000001]
87          33 encodes as [0 0100001]
88         129 encodes as [1 0000001] [0 0000001]
89        2000 encodes as [1 0001111] [0 1010000]
90*/
91static unsigned char *
92encode_int(unsigned char *p, svn_filesize_t val)
93{
94  int n;
95  svn_filesize_t v;
96  unsigned char cont;
97
98  SVN_ERR_ASSERT_NO_RETURN(val >= 0);
99
100  /* Figure out how many bytes we'll need.  */
101  v = val >> 7;
102  n = 1;
103  while (v > 0)
104    {
105      v = v >> 7;
106      n++;
107    }
108
109  SVN_ERR_ASSERT_NO_RETURN(n <= MAX_ENCODED_INT_LEN);
110
111  /* Encode the remaining bytes; n is always the number of bytes
112     coming after the one we're encoding.  */
113  while (--n >= 0)
114    {
115      cont = ((n > 0) ? 0x1 : 0x0) << 7;
116      *p++ = (unsigned char)(((val >> (n * 7)) & 0x7f) | cont);
117    }
118
119  return p;
120}
121
122
123/* Append an encoded integer to a string.  */
124static void
125append_encoded_int(svn_stringbuf_t *header, svn_filesize_t val)
126{
127  unsigned char buf[MAX_ENCODED_INT_LEN], *p;
128
129  p = encode_int(buf, val);
130  svn_stringbuf_appendbytes(header, (const char *)buf, p - buf);
131}
132
133/* If IN is a string that is >= MIN_COMPRESS_SIZE and the COMPRESSION_LEVEL
134   is not SVN_DELTA_COMPRESSION_LEVEL_NONE, zlib compress it and places the
135   result in OUT, with an integer prepended specifying the original size.
136   If IN is < MIN_COMPRESS_SIZE, or if the compressed version of IN was no
137   smaller than the original IN, OUT will be a copy of IN with the size
138   prepended as an integer. */
139static svn_error_t *
140zlib_encode(const char *data,
141            apr_size_t len,
142            svn_stringbuf_t *out,
143            int compression_level)
144{
145  unsigned long endlen;
146  apr_size_t intlen;
147
148  svn_stringbuf_setempty(out);
149  append_encoded_int(out, len);
150  intlen = out->len;
151
152  /* Compression initialization overhead is considered to large for
153     short buffers.  Also, if we don't actually want to compress data,
154     ZLIB will produce an output no shorter than the input.  Hence,
155     the DATA would directly appended to OUT, so we can do that directly
156     without calling ZLIB before. */
157  if (   (len < MIN_COMPRESS_SIZE)
158      || (compression_level == SVN_DELTA_COMPRESSION_LEVEL_NONE))
159    {
160      svn_stringbuf_appendbytes(out, data, len);
161    }
162  else
163    {
164      int zerr;
165
166      svn_stringbuf_ensure(out, svnCompressBound(len) + intlen);
167      endlen = out->blocksize;
168
169      zerr = compress2((unsigned char *)out->data + intlen, &endlen,
170                       (const unsigned char *)data, len,
171                       compression_level);
172      if (zerr != Z_OK)
173        return svn_error_trace(svn_error__wrap_zlib(
174                                 zerr, "compress2",
175                                 _("Compression of svndiff data failed")));
176
177      /* Compression didn't help :(, just append the original text */
178      if (endlen >= len)
179        {
180          svn_stringbuf_appendbytes(out, data, len);
181          return SVN_NO_ERROR;
182        }
183      out->len = endlen + intlen;
184      out->data[out->len] = 0;
185    }
186  return SVN_NO_ERROR;
187}
188
189static svn_error_t *
190send_simple_insertion_window(svn_txdelta_window_t *window,
191                             struct encoder_baton *eb)
192{
193  unsigned char headers[4 + 5 * MAX_ENCODED_INT_LEN + MAX_INSTRUCTION_LEN];
194  unsigned char ibuf[MAX_INSTRUCTION_LEN];
195  unsigned char *header_current;
196  apr_size_t header_len;
197  apr_size_t ip_len, i;
198  apr_size_t len = window->new_data->len;
199
200  /* there is only one target copy op. It must span the whole window */
201  assert(window->ops[0].action_code == svn_txdelta_new);
202  assert(window->ops[0].length == window->tview_len);
203  assert(window->ops[0].offset == 0);
204
205  /* write stream header if necessary */
206  if (!eb->header_done)
207    {
208      eb->header_done = TRUE;
209      headers[0] = 'S';
210      headers[1] = 'V';
211      headers[2] = 'N';
212      headers[3] = (unsigned char)eb->version;
213      header_current = headers + 4;
214    }
215  else
216    {
217      header_current = headers;
218    }
219
220  /* Encode the action code and length.  */
221  if (window->tview_len >> 6 == 0)
222    {
223      ibuf[0] = (unsigned char)(window->tview_len + (0x2 << 6));
224      ip_len = 1;
225    }
226  else
227    {
228      ibuf[0] = (0x2 << 6);
229      ip_len = encode_int(ibuf + 1, window->tview_len) - ibuf;
230    }
231
232  /* encode the window header.  Please note that the source window may
233   * have content despite not being used for deltification. */
234  header_current = encode_int(header_current, window->sview_offset);
235  header_current = encode_int(header_current, window->sview_len);
236  header_current = encode_int(header_current, window->tview_len);
237  header_current[0] = (unsigned char)ip_len;  /* 1 instruction */
238  header_current = encode_int(&header_current[1], len);
239
240  /* append instructions (1 to a handful of bytes) */
241  for (i = 0; i < ip_len; ++i)
242    header_current[i] = ibuf[i];
243
244  header_len = header_current - headers + ip_len;
245
246  /* Write out the window.  */
247  SVN_ERR(svn_stream_write(eb->output, (const char *)headers, &header_len));
248  if (len)
249    SVN_ERR(svn_stream_write(eb->output, window->new_data->data, &len));
250
251  return SVN_NO_ERROR;
252}
253
254static svn_error_t *
255window_handler(svn_txdelta_window_t *window, void *baton)
256{
257  struct encoder_baton *eb = baton;
258  apr_pool_t *pool;
259  svn_stringbuf_t *instructions;
260  svn_stringbuf_t *i1;
261  svn_stringbuf_t *header;
262  const svn_string_t *newdata;
263  unsigned char ibuf[MAX_INSTRUCTION_LEN], *ip;
264  const svn_txdelta_op_t *op;
265  apr_size_t len;
266
267  /* use specialized code if there is no source */
268  if (window && !window->src_ops && window->num_ops == 1 && !eb->version)
269    return svn_error_trace(send_simple_insertion_window(window, eb));
270
271  /* Make sure we write the header.  */
272  if (!eb->header_done)
273    {
274      char svnver[4] = {'S','V','N','\0'};
275      len = 4;
276      svnver[3] = (char)eb->version;
277      SVN_ERR(svn_stream_write(eb->output, svnver, &len));
278      eb->header_done = TRUE;
279    }
280
281  if (window == NULL)
282    {
283      svn_stream_t *output = eb->output;
284
285      /* We're done; clean up.
286
287         We clean our pool first. Given that the output stream was passed
288         TO us, we'll assume it has a longer lifetime, and that it will not
289         be affected by our pool destruction.
290
291         The contrary point of view (close the stream first): that could
292         tell our user that everything related to the output stream is done,
293         and a cleanup of the user pool should occur. However, that user
294         pool could include the subpool we created for our work (eb->pool),
295         which would then make our call to svn_pool_destroy() puke.
296       */
297      svn_pool_destroy(eb->pool);
298
299      return svn_stream_close(output);
300    }
301
302  /* create the necessary data buffers */
303  pool = svn_pool_create(eb->pool);
304  instructions = svn_stringbuf_create_empty(pool);
305  i1 = svn_stringbuf_create_empty(pool);
306  header = svn_stringbuf_create_empty(pool);
307
308  /* Encode the instructions.  */
309  for (op = window->ops; op < window->ops + window->num_ops; op++)
310    {
311      /* Encode the action code and length.  */
312      ip = ibuf;
313      switch (op->action_code)
314        {
315        case svn_txdelta_source: *ip = 0; break;
316        case svn_txdelta_target: *ip = (0x1 << 6); break;
317        case svn_txdelta_new:    *ip = (0x2 << 6); break;
318        }
319      if (op->length >> 6 == 0)
320        *ip++ |= (unsigned char)op->length;
321      else
322        ip = encode_int(ip + 1, op->length);
323      if (op->action_code != svn_txdelta_new)
324        ip = encode_int(ip, op->offset);
325      svn_stringbuf_appendbytes(instructions, (const char *)ibuf, ip - ibuf);
326    }
327
328  /* Encode the header.  */
329  append_encoded_int(header, window->sview_offset);
330  append_encoded_int(header, window->sview_len);
331  append_encoded_int(header, window->tview_len);
332  if (eb->version == 1)
333    {
334      SVN_ERR(zlib_encode(instructions->data, instructions->len,
335                          i1, eb->compression_level));
336      instructions = i1;
337    }
338  append_encoded_int(header, instructions->len);
339  if (eb->version == 1)
340    {
341      svn_stringbuf_t *temp = svn_stringbuf_create_empty(pool);
342      svn_string_t *tempstr = svn_string_create_empty(pool);
343      SVN_ERR(zlib_encode(window->new_data->data, window->new_data->len,
344                          temp, eb->compression_level));
345      tempstr->data = temp->data;
346      tempstr->len = temp->len;
347      newdata = tempstr;
348    }
349  else
350    newdata = window->new_data;
351
352  append_encoded_int(header, newdata->len);
353
354  /* Write out the window.  */
355  len = header->len;
356  SVN_ERR(svn_stream_write(eb->output, header->data, &len));
357  if (instructions->len > 0)
358    {
359      len = instructions->len;
360      SVN_ERR(svn_stream_write(eb->output, instructions->data, &len));
361    }
362  if (newdata->len > 0)
363    {
364      len = newdata->len;
365      SVN_ERR(svn_stream_write(eb->output, newdata->data, &len));
366    }
367
368  svn_pool_destroy(pool);
369  return SVN_NO_ERROR;
370}
371
372void
373svn_txdelta_to_svndiff3(svn_txdelta_window_handler_t *handler,
374                        void **handler_baton,
375                        svn_stream_t *output,
376                        int svndiff_version,
377                        int compression_level,
378                        apr_pool_t *pool)
379{
380  apr_pool_t *subpool = svn_pool_create(pool);
381  struct encoder_baton *eb;
382
383  eb = apr_palloc(subpool, sizeof(*eb));
384  eb->output = output;
385  eb->header_done = FALSE;
386  eb->pool = subpool;
387  eb->version = svndiff_version;
388  eb->compression_level = compression_level;
389
390  *handler = window_handler;
391  *handler_baton = eb;
392}
393
394void
395svn_txdelta_to_svndiff2(svn_txdelta_window_handler_t *handler,
396                        void **handler_baton,
397                        svn_stream_t *output,
398                        int svndiff_version,
399                        apr_pool_t *pool)
400{
401  svn_txdelta_to_svndiff3(handler, handler_baton, output, svndiff_version,
402                          SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
403}
404
405void
406svn_txdelta_to_svndiff(svn_stream_t *output,
407                       apr_pool_t *pool,
408                       svn_txdelta_window_handler_t *handler,
409                       void **handler_baton)
410{
411  svn_txdelta_to_svndiff3(handler, handler_baton, output, 0,
412                          SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
413}
414
415
416/* ----- svndiff to text delta ----- */
417
418/* An svndiff parser object.  */
419struct decode_baton
420{
421  /* Once the svndiff parser has enough data buffered to create a
422     "window", it passes this window to the caller's consumer routine.  */
423  svn_txdelta_window_handler_t consumer_func;
424  void *consumer_baton;
425
426  /* Pool to create subpools from; each developing window will be a
427     subpool.  */
428  apr_pool_t *pool;
429
430  /* The current subpool which contains our current window-buffer.  */
431  apr_pool_t *subpool;
432
433  /* The actual svndiff data buffer, living within subpool.  */
434  svn_stringbuf_t *buffer;
435
436  /* The offset and size of the last source view, so that we can check
437     to make sure the next one isn't sliding backwards.  */
438  svn_filesize_t last_sview_offset;
439  apr_size_t last_sview_len;
440
441  /* We have to discard four bytes at the beginning for the header.
442     This field keeps track of how many of those bytes we have read.  */
443  apr_size_t header_bytes;
444
445  /* Do we want an error to occur when we close the stream that
446     indicates we didn't send the whole svndiff data?  If you plan to
447     not transmit the whole svndiff data stream, you will want this to
448     be FALSE. */
449  svn_boolean_t error_on_early_close;
450
451  /* svndiff version in use by delta.  */
452  unsigned char version;
453};
454
455
456/* Decode an svndiff-encoded integer into *VAL and return a pointer to
457   the byte after the integer.  The bytes to be decoded live in the
458   range [P..END-1].  If these bytes do not contain a whole encoded
459   integer, return NULL; in this case *VAL is undefined.
460
461   See the comment for encode_int() earlier in this file for more detail on
462   the encoding format.  */
463static const unsigned char *
464decode_file_offset(svn_filesize_t *val,
465                   const unsigned char *p,
466                   const unsigned char *end)
467{
468  svn_filesize_t temp = 0;
469
470  if (p + MAX_ENCODED_INT_LEN < end)
471    end = p + MAX_ENCODED_INT_LEN;
472  /* Decode bytes until we're done.  */
473  while (p < end)
474    {
475      /* Don't use svn_filesize_t here, because this might be 64 bits
476       * on 32 bit targets. Optimizing compilers may or may not be
477       * able to reduce that to the effective code below. */
478      unsigned int c = *p++;
479
480      temp = (temp << 7) | (c & 0x7f);
481      if (c < 0x80)
482      {
483        *val = temp;
484        return p;
485      }
486    }
487
488  return NULL;
489}
490
491
492/* Same as above, only decode into a size variable. */
493static const unsigned char *
494decode_size(apr_size_t *val,
495            const unsigned char *p,
496            const unsigned char *end)
497{
498  apr_size_t temp = 0;
499
500  if (p + MAX_ENCODED_INT_LEN < end)
501    end = p + MAX_ENCODED_INT_LEN;
502  /* Decode bytes until we're done.  */
503  while (p < end)
504    {
505      apr_size_t c = *p++;
506
507      temp = (temp << 7) | (c & 0x7f);
508      if (c < 0x80)
509      {
510        *val = temp;
511        return p;
512      }
513    }
514
515  return NULL;
516}
517
518/* Decode the possibly-zlib compressed string of length INLEN that is in
519   IN, into OUT.  We expect an integer is prepended to IN that specifies
520   the original size, and that if encoded size == original size, that the
521   remaining data is not compressed.
522   In that case, we will simply return pointer into IN as data pointer for
523   OUT, COPYLESS_ALLOWED has been set.  The, the caller is expected not to
524   modify the contents of OUT.
525   An error is returned if the decoded length exceeds the given LIMIT.
526 */
527static svn_error_t *
528zlib_decode(const unsigned char *in, apr_size_t inLen, svn_stringbuf_t *out,
529            apr_size_t limit)
530{
531  apr_size_t len;
532  const unsigned char *oldplace = in;
533
534  /* First thing in the string is the original length.  */
535  in = decode_size(&len, in, in + inLen);
536  if (in == NULL)
537    return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL,
538                            _("Decompression of svndiff data failed: no size"));
539  if (len > limit)
540    return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL,
541                            _("Decompression of svndiff data failed: "
542                              "size too large"));
543  /* We need to subtract the size of the encoded original length off the
544   *      still remaining input length.  */
545  inLen -= (in - oldplace);
546  if (inLen == len)
547    {
548      svn_stringbuf_ensure(out, len);
549      memcpy(out->data, in, len);
550      out->data[len] = 0;
551      out->len = len;
552
553      return SVN_NO_ERROR;
554    }
555  else
556    {
557      unsigned long zlen = len;
558      int zerr;
559
560      svn_stringbuf_ensure(out, len);
561      zerr = uncompress((unsigned char *)out->data, &zlen, in, inLen);
562      if (zerr != Z_OK)
563        return svn_error_trace(svn_error__wrap_zlib(
564                                 zerr, "uncompress",
565                                 _("Decompression of svndiff data failed")));
566
567      /* Zlib should not produce something that has a different size than the
568         original length we stored. */
569      if (zlen != len)
570        return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA,
571                                NULL,
572                                _("Size of uncompressed data "
573                                  "does not match stored original length"));
574      out->data[zlen] = 0;
575      out->len = zlen;
576    }
577  return SVN_NO_ERROR;
578}
579
580/* Decode an instruction into OP, returning a pointer to the text
581   after the instruction.  Note that if the action code is
582   svn_txdelta_new, the offset field of *OP will not be set.  */
583static const unsigned char *
584decode_instruction(svn_txdelta_op_t *op,
585                   const unsigned char *p,
586                   const unsigned char *end)
587{
588  apr_size_t c;
589  apr_size_t action;
590
591  if (p == end)
592    return NULL;
593
594  /* We need this more than once */
595  c = *p++;
596
597  /* Decode the instruction selector.  */
598  action = (c >> 6) & 0x3;
599  if (action >= 0x3)
600      return NULL;
601
602  /* This relies on enum svn_delta_action values to match and never to be
603     redefined. */
604  op->action_code = (enum svn_delta_action)(action);
605
606  /* Decode the length and offset.  */
607  op->length = c & 0x3f;
608  if (op->length == 0)
609    {
610      p = decode_size(&op->length, p, end);
611      if (p == NULL)
612        return NULL;
613    }
614  if (action != svn_txdelta_new)
615    {
616      p = decode_size(&op->offset, p, end);
617      if (p == NULL)
618        return NULL;
619    }
620
621  return p;
622}
623
624/* Count the instructions in the range [P..END-1] and make sure they
625   are valid for the given window lengths.  Return an error if the
626   instructions are invalid; otherwise set *NINST to the number of
627   instructions.  */
628static svn_error_t *
629count_and_verify_instructions(int *ninst,
630                              const unsigned char *p,
631                              const unsigned char *end,
632                              apr_size_t sview_len,
633                              apr_size_t tview_len,
634                              apr_size_t new_len)
635{
636  int n = 0;
637  svn_txdelta_op_t op;
638  apr_size_t tpos = 0, npos = 0;
639
640  while (p < end)
641    {
642      p = decode_instruction(&op, p, end);
643
644      /* Detect any malformed operations from the instruction stream. */
645      if (p == NULL)
646        return svn_error_createf
647          (SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
648           _("Invalid diff stream: insn %d cannot be decoded"), n);
649      else if (op.length == 0)
650        return svn_error_createf
651          (SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
652           _("Invalid diff stream: insn %d has length zero"), n);
653      else if (op.length > tview_len - tpos)
654        return svn_error_createf
655          (SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
656           _("Invalid diff stream: insn %d overflows the target view"), n);
657
658      switch (op.action_code)
659        {
660        case svn_txdelta_source:
661          if (op.length > sview_len - op.offset ||
662              op.offset > sview_len)
663            return svn_error_createf
664              (SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
665               _("Invalid diff stream: "
666                 "[src] insn %d overflows the source view"), n);
667          break;
668        case svn_txdelta_target:
669          if (op.offset >= tpos)
670            return svn_error_createf
671              (SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
672               _("Invalid diff stream: "
673                 "[tgt] insn %d starts beyond the target view position"), n);
674          break;
675        case svn_txdelta_new:
676          if (op.length > new_len - npos)
677            return svn_error_createf
678              (SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
679               _("Invalid diff stream: "
680                 "[new] insn %d overflows the new data section"), n);
681          npos += op.length;
682          break;
683        }
684      tpos += op.length;
685      n++;
686    }
687  if (tpos != tview_len)
688    return svn_error_create(SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
689                            _("Delta does not fill the target window"));
690  if (npos != new_len)
691    return svn_error_create(SVN_ERR_SVNDIFF_INVALID_OPS, NULL,
692                            _("Delta does not contain enough new data"));
693
694  *ninst = n;
695  return SVN_NO_ERROR;
696}
697
698/* Given the five integer fields of a window header and a pointer to
699   the remainder of the window contents, fill in a delta window
700   structure *WINDOW.  New allocations will be performed in POOL;
701   the new_data field of *WINDOW will refer directly to memory pointed
702   to by DATA. */
703static svn_error_t *
704decode_window(svn_txdelta_window_t *window, svn_filesize_t sview_offset,
705              apr_size_t sview_len, apr_size_t tview_len, apr_size_t inslen,
706              apr_size_t newlen, const unsigned char *data, apr_pool_t *pool,
707              unsigned int version)
708{
709  const unsigned char *insend;
710  int ninst;
711  apr_size_t npos;
712  svn_txdelta_op_t *ops, *op;
713  svn_string_t *new_data = apr_palloc(pool, sizeof(*new_data));
714
715  window->sview_offset = sview_offset;
716  window->sview_len = sview_len;
717  window->tview_len = tview_len;
718
719  insend = data + inslen;
720
721  if (version == 1)
722    {
723      svn_stringbuf_t *instout = svn_stringbuf_create_empty(pool);
724      svn_stringbuf_t *ndout = svn_stringbuf_create_empty(pool);
725
726      SVN_ERR(zlib_decode(insend, newlen, ndout,
727                          SVN_DELTA_WINDOW_SIZE));
728      SVN_ERR(zlib_decode(data, insend - data, instout,
729                          MAX_INSTRUCTION_SECTION_LEN));
730
731      newlen = ndout->len;
732      data = (unsigned char *)instout->data;
733      insend = (unsigned char *)instout->data + instout->len;
734
735      new_data->data = (const char *) ndout->data;
736      new_data->len = newlen;
737    }
738  else
739    {
740      /* Copy the data because an svn_string_t must have the invariant
741         data[len]=='\0'. */
742      char *buf = apr_palloc(pool, newlen + 1);
743
744      memcpy(buf, insend, newlen);
745      buf[newlen] = '\0';
746      new_data->data = buf;
747      new_data->len = newlen;
748    }
749
750  /* Count the instructions and make sure they are all valid.  */
751  SVN_ERR(count_and_verify_instructions(&ninst, data, insend,
752                                        sview_len, tview_len, newlen));
753
754  /* Allocate a buffer for the instructions and decode them. */
755  ops = apr_palloc(pool, ninst * sizeof(*ops));
756  npos = 0;
757  window->src_ops = 0;
758  for (op = ops; op < ops + ninst; op++)
759    {
760      data = decode_instruction(op, data, insend);
761      if (op->action_code == svn_txdelta_source)
762        ++window->src_ops;
763      else if (op->action_code == svn_txdelta_new)
764        {
765          op->offset = npos;
766          npos += op->length;
767        }
768    }
769  SVN_ERR_ASSERT(data == insend);
770
771  window->ops = ops;
772  window->num_ops = ninst;
773  window->new_data = new_data;
774
775  return SVN_NO_ERROR;
776}
777
778static svn_error_t *
779write_handler(void *baton,
780              const char *buffer,
781              apr_size_t *len)
782{
783  struct decode_baton *db = (struct decode_baton *) baton;
784  const unsigned char *p, *end;
785  svn_filesize_t sview_offset;
786  apr_size_t sview_len, tview_len, inslen, newlen, remaining;
787  apr_size_t buflen = *len;
788
789  /* Chew up four bytes at the beginning for the header.  */
790  if (db->header_bytes < 4)
791    {
792      apr_size_t nheader = 4 - db->header_bytes;
793      if (nheader > buflen)
794        nheader = buflen;
795      if (memcmp(buffer, "SVN\0" + db->header_bytes, nheader) == 0)
796        db->version = 0;
797      else if (memcmp(buffer, "SVN\1" + db->header_bytes, nheader) == 0)
798        db->version = 1;
799      else
800        return svn_error_create(SVN_ERR_SVNDIFF_INVALID_HEADER, NULL,
801                                _("Svndiff has invalid header"));
802      buflen -= nheader;
803      buffer += nheader;
804      db->header_bytes += nheader;
805    }
806
807  /* Concatenate the old with the new.  */
808  svn_stringbuf_appendbytes(db->buffer, buffer, buflen);
809
810  /* We have a buffer of svndiff data that might be good for:
811
812     a) an integral number of windows' worth of data - this is a
813        trivial case.  Make windows from our data and ship them off.
814
815     b) a non-integral number of windows' worth of data - we shall
816        consume the integral portion of the window data, and then
817        somewhere in the following loop the decoding of the svndiff
818        data will run out of stuff to decode, and will simply return
819        SVN_NO_ERROR, anxiously awaiting more data.
820  */
821
822  while (1)
823    {
824      apr_pool_t *newpool;
825      svn_txdelta_window_t window;
826
827      /* Read the header, if we have enough bytes for that.  */
828      p = (const unsigned char *) db->buffer->data;
829      end = (const unsigned char *) db->buffer->data + db->buffer->len;
830
831      p = decode_file_offset(&sview_offset, p, end);
832      if (p == NULL)
833        break;
834
835      p = decode_size(&sview_len, p, end);
836      if (p == NULL)
837        break;
838
839      p = decode_size(&tview_len, p, end);
840      if (p == NULL)
841        break;
842
843      p = decode_size(&inslen, p, end);
844      if (p == NULL)
845        break;
846
847      p = decode_size(&newlen, p, end);
848      if (p == NULL)
849        break;
850
851      if (tview_len > SVN_DELTA_WINDOW_SIZE ||
852          sview_len > SVN_DELTA_WINDOW_SIZE ||
853          /* for svndiff1, newlen includes the original length */
854          newlen > SVN_DELTA_WINDOW_SIZE + MAX_ENCODED_INT_LEN ||
855          inslen > MAX_INSTRUCTION_SECTION_LEN)
856        return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL,
857                                _("Svndiff contains a too-large window"));
858
859      /* Check for integer overflow.  */
860      if (sview_offset < 0 || inslen + newlen < inslen
861          || sview_len + tview_len < sview_len
862          || (apr_size_t)sview_offset + sview_len < (apr_size_t)sview_offset)
863        return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL,
864                                _("Svndiff contains corrupt window header"));
865
866      /* Check for source windows which slide backwards.  */
867      if (sview_len > 0
868          && (sview_offset < db->last_sview_offset
869              || (sview_offset + sview_len
870                  < db->last_sview_offset + db->last_sview_len)))
871        return svn_error_create
872          (SVN_ERR_SVNDIFF_BACKWARD_VIEW, NULL,
873           _("Svndiff has backwards-sliding source views"));
874
875      /* Wait for more data if we don't have enough bytes for the
876         whole window.  */
877      if ((apr_size_t) (end - p) < inslen + newlen)
878        return SVN_NO_ERROR;
879
880      /* Decode the window and send it off. */
881      SVN_ERR(decode_window(&window, sview_offset, sview_len, tview_len,
882                            inslen, newlen, p, db->subpool,
883                            db->version));
884      SVN_ERR(db->consumer_func(&window, db->consumer_baton));
885
886      /* Make a new subpool and buffer, saving aside the remaining
887         data in the old buffer.  */
888      newpool = svn_pool_create(db->pool);
889      p += inslen + newlen;
890      remaining = db->buffer->data + db->buffer->len - (const char *) p;
891      db->buffer =
892        svn_stringbuf_ncreate((const char *) p, remaining, newpool);
893
894      /* Remember the offset and length of the source view for next time.  */
895      db->last_sview_offset = sview_offset;
896      db->last_sview_len = sview_len;
897
898      /* We've copied stuff out of the old pool. Toss that pool and use
899         our new pool.
900         ### might be nice to avoid the copy and just use svn_pool_clear
901         ### to get rid of whatever the "other stuff" is. future project...
902      */
903      svn_pool_destroy(db->subpool);
904      db->subpool = newpool;
905    }
906
907  /* At this point we processed all integral windows and DB->BUFFER is empty
908     or contains partially read window header.
909     Check that unprocessed data is not larger that theoretical maximum
910     window header size. */
911  if (db->buffer->len > 5 * MAX_ENCODED_INT_LEN)
912    return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL,
913                            _("Svndiff contains a too-large window header"));
914
915  return SVN_NO_ERROR;
916}
917
918/* Minimal svn_stream_t write handler, doing nothing */
919static svn_error_t *
920noop_write_handler(void *baton,
921                   const char *buffer,
922                   apr_size_t *len)
923{
924  return SVN_NO_ERROR;
925}
926
927static svn_error_t *
928close_handler(void *baton)
929{
930  struct decode_baton *db = (struct decode_baton *) baton;
931  svn_error_t *err;
932
933  /* Make sure that we're at a plausible end of stream, returning an
934     error if we are expected to do so.  */
935  if ((db->error_on_early_close)
936      && (db->header_bytes < 4 || db->buffer->len != 0))
937    return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL,
938                            _("Unexpected end of svndiff input"));
939
940  /* Tell the window consumer that we're done, and clean up.  */
941  err = db->consumer_func(NULL, db->consumer_baton);
942  svn_pool_destroy(db->pool);
943  return err;
944}
945
946
947svn_stream_t *
948svn_txdelta_parse_svndiff(svn_txdelta_window_handler_t handler,
949                          void *handler_baton,
950                          svn_boolean_t error_on_early_close,
951                          apr_pool_t *pool)
952{
953  apr_pool_t *subpool = svn_pool_create(pool);
954  struct decode_baton *db = apr_palloc(pool, sizeof(*db));
955  svn_stream_t *stream;
956
957  db->consumer_func = handler;
958  db->consumer_baton = handler_baton;
959  db->pool = subpool;
960  db->subpool = svn_pool_create(subpool);
961  db->buffer = svn_stringbuf_create_empty(db->subpool);
962  db->last_sview_offset = 0;
963  db->last_sview_len = 0;
964  db->header_bytes = 0;
965  db->error_on_early_close = error_on_early_close;
966  stream = svn_stream_create(db, pool);
967
968  if (handler != svn_delta_noop_window_handler)
969    {
970      svn_stream_set_write(stream, write_handler);
971      svn_stream_set_close(stream, close_handler);
972    }
973  else
974    {
975      /* And else we just ignore everything as efficiently as we can.
976         by only hooking a no-op handler */
977      svn_stream_set_write(stream, noop_write_handler);
978    }
979  return stream;
980}
981
982
983/* Routines for reading one svndiff window at a time. */
984
985/* Read one byte from STREAM into *BYTE. */
986static svn_error_t *
987read_one_byte(unsigned char *byte, svn_stream_t *stream)
988{
989  char c;
990  apr_size_t len = 1;
991
992  SVN_ERR(svn_stream_read(stream, &c, &len));
993  if (len == 0)
994    return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL,
995                            _("Unexpected end of svndiff input"));
996  *byte = (unsigned char) c;
997  return SVN_NO_ERROR;
998}
999
1000/* Read and decode one integer from STREAM into *SIZE. */
1001static svn_error_t *
1002read_one_size(apr_size_t *size, svn_stream_t *stream)
1003{
1004  unsigned char c;
1005
1006  *size = 0;
1007  while (1)
1008    {
1009      SVN_ERR(read_one_byte(&c, stream));
1010      *size = (*size << 7) | (c & 0x7f);
1011      if (!(c & 0x80))
1012        break;
1013    }
1014  return SVN_NO_ERROR;
1015}
1016
1017/* Read a window header from STREAM and check it for integer overflow. */
1018static svn_error_t *
1019read_window_header(svn_stream_t *stream, svn_filesize_t *sview_offset,
1020                   apr_size_t *sview_len, apr_size_t *tview_len,
1021                   apr_size_t *inslen, apr_size_t *newlen)
1022{
1023  unsigned char c;
1024
1025  /* Read the source view offset by hand, since it's not an apr_size_t. */
1026  *sview_offset = 0;
1027  while (1)
1028    {
1029      SVN_ERR(read_one_byte(&c, stream));
1030      *sview_offset = (*sview_offset << 7) | (c & 0x7f);
1031      if (!(c & 0x80))
1032        break;
1033    }
1034
1035  /* Read the four size fields. */
1036  SVN_ERR(read_one_size(sview_len, stream));
1037  SVN_ERR(read_one_size(tview_len, stream));
1038  SVN_ERR(read_one_size(inslen, stream));
1039  SVN_ERR(read_one_size(newlen, stream));
1040
1041  if (*tview_len > SVN_DELTA_WINDOW_SIZE ||
1042      *sview_len > SVN_DELTA_WINDOW_SIZE ||
1043      /* for svndiff1, newlen includes the original length */
1044      *newlen > SVN_DELTA_WINDOW_SIZE + MAX_ENCODED_INT_LEN ||
1045      *inslen > MAX_INSTRUCTION_SECTION_LEN)
1046    return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL,
1047                            _("Svndiff contains a too-large window"));
1048
1049  /* Check for integer overflow.  */
1050  if (*sview_offset < 0 || *inslen + *newlen < *inslen
1051      || *sview_len + *tview_len < *sview_len
1052      || (apr_size_t)*sview_offset + *sview_len < (apr_size_t)*sview_offset)
1053    return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL,
1054                            _("Svndiff contains corrupt window header"));
1055
1056  return SVN_NO_ERROR;
1057}
1058
1059svn_error_t *
1060svn_txdelta_read_svndiff_window(svn_txdelta_window_t **window,
1061                                svn_stream_t *stream,
1062                                int svndiff_version,
1063                                apr_pool_t *pool)
1064{
1065  svn_filesize_t sview_offset;
1066  apr_size_t sview_len, tview_len, inslen, newlen, len;
1067  unsigned char *buf;
1068
1069  SVN_ERR(read_window_header(stream, &sview_offset, &sview_len, &tview_len,
1070                             &inslen, &newlen));
1071  len = inslen + newlen;
1072  buf = apr_palloc(pool, len);
1073  SVN_ERR(svn_stream_read(stream, (char*)buf, &len));
1074  if (len < inslen + newlen)
1075    return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL,
1076                            _("Unexpected end of svndiff input"));
1077  *window = apr_palloc(pool, sizeof(**window));
1078  return decode_window(*window, sview_offset, sview_len, tview_len, inslen,
1079                       newlen, buf, pool, svndiff_version);
1080}
1081
1082
1083svn_error_t *
1084svn_txdelta_skip_svndiff_window(apr_file_t *file,
1085                                int svndiff_version,
1086                                apr_pool_t *pool)
1087{
1088  svn_stream_t *stream = svn_stream_from_aprfile2(file, TRUE, pool);
1089  svn_filesize_t sview_offset;
1090  apr_size_t sview_len, tview_len, inslen, newlen;
1091  apr_off_t offset;
1092
1093  SVN_ERR(read_window_header(stream, &sview_offset, &sview_len, &tview_len,
1094                             &inslen, &newlen));
1095
1096  offset = inslen + newlen;
1097  return svn_io_file_seek(file, APR_CUR, &offset, pool);
1098}
1099
1100
1101svn_error_t *
1102svn__compress(svn_string_t *in,
1103              svn_stringbuf_t *out,
1104              int compression_level)
1105{
1106  return zlib_encode(in->data, in->len, out, compression_level);
1107}
1108
1109svn_error_t *
1110svn__decompress(svn_string_t *in,
1111                svn_stringbuf_t *out,
1112                apr_size_t limit)
1113{
1114  return zlib_decode((const unsigned char*)in->data, in->len, out, limit);
1115}
1116