packed_data.c revision 299742
1/* packed_data.c : implement the packed binary stream data structure
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include <apr_tables.h>
24
25#include "svn_string.h"
26#include "svn_sorts.h"
27#include "private/svn_string_private.h"
28#include "private/svn_subr_private.h"
29#include "private/svn_delta_private.h"
30#include "private/svn_packed_data.h"
31
32#include "svn_private_config.h"
33
34
35
36/* Private int stream data referenced by svn_packed__int_stream_t.
37 */
38typedef struct packed_int_private_t
39{
40  /* First sub-stream, if any.  NULL otherwise. */
41  svn_packed__int_stream_t *first_substream;
42
43  /* Last sub-stream, if any.  NULL otherwise. */
44  svn_packed__int_stream_t *last_substream;
45
46  /* Current sub-stream to read from / write to, if any.  NULL otherwise.
47     This will be initialized to FIRST_SUBSTREAM and then advanced in a
48     round-robin scheme after each number being read. */
49  svn_packed__int_stream_t *current_substream;
50
51  /* Number of sub-streams. */
52  apr_size_t substream_count;
53
54  /* Next (sibling) integer stream.  If this is the last one, points to
55     the first in the list (i.e. this forms a ring list).  Never NULL. */
56  svn_packed__int_stream_t *next;
57
58  /* 7b/8b encoded integer values (previously diff'ed and sign-handled,
59     if indicated by the flags below).  The contents are disjoint from
60     the unparsed number buffer.  May be NULL while not written to. */
61  svn_stringbuf_t *packed;
62
63  /* Initialized to 0.  Latest value written to / read from PACKED.
64     Undefined if DIFF is FALSE. */
65  apr_uint64_t last_value;
66
67  /* Deltify data before storing it in PACKED. */
68  svn_boolean_t diff;
69
70  /* Numbers are likely to contain negative values with small absolutes.
71     If TRUE, store the signed bit in LSB before encoding. */
72  svn_boolean_t is_signed;
73
74  /* Number of integers in this stream. */
75  apr_size_t item_count;
76
77  /* TRUE for the last stream in a list of siblings. */
78  svn_boolean_t is_last;
79
80  /* Pool to use for allocations. */
81  apr_pool_t *pool;
82} packed_int_private_t;
83
84/* A byte sequence stream.  Please note that NEXT is defined different
85 * from the NEXT member in integer streams.
86 */
87struct svn_packed__byte_stream_t
88{
89  /* First sub-stream, if any.  NULL otherwise. */
90  svn_packed__byte_stream_t *first_substream;
91
92  /* Last sub-stream, if any.  NULL otherwise. */
93  svn_packed__byte_stream_t *last_substream;
94
95  /* Next (sibling) byte sequence stream, if any.  NULL otherwise. */
96  svn_packed__byte_stream_t *next;
97
98  /* Stream to store the sequence lengths. */
99  svn_packed__int_stream_t *lengths_stream;
100
101  /* It's index (relative to its parent). */
102  apr_size_t lengths_stream_index;
103
104  /* Concatenated byte sequences. */
105  svn_stringbuf_t *packed;
106
107  /* Pool to use for allocations. */
108  apr_pool_t *pool;
109};
110
111/* The serialization root object.  It references the top-level streams.
112 */
113struct svn_packed__data_root_t
114{
115  /* First top-level integer stream, if any.  NULL otherwise. */
116  svn_packed__int_stream_t *first_int_stream;
117
118  /* Last top-level integer stream, if any.  NULL otherwise. */
119  svn_packed__int_stream_t *last_int_stream;
120
121  /* Number of top-level integer streams. */
122  apr_size_t int_stream_count;
123
124  /* First top-level byte sequence stream, if any.  NULL otherwise. */
125  svn_packed__byte_stream_t *first_byte_stream;
126
127  /* Last top-level byte sequence stream, if any.  NULL otherwise. */
128  svn_packed__byte_stream_t *last_byte_stream;
129
130  /* Number of top-level byte sequence streams. */
131  apr_size_t byte_stream_count;
132
133  /* Pool to use for allocations. */
134  apr_pool_t *pool;
135};
136
137/* Write access. */
138
139svn_packed__data_root_t *
140svn_packed__data_create_root(apr_pool_t *pool)
141{
142  svn_packed__data_root_t *root = apr_pcalloc(pool, sizeof(*root));
143  root->pool = pool;
144
145  return root;
146}
147
148svn_packed__int_stream_t *
149svn_packed__create_int_stream(svn_packed__data_root_t *root,
150                              svn_boolean_t diff,
151                              svn_boolean_t signed_ints)
152{
153  /* allocate and initialize the stream node */
154  packed_int_private_t *private_data
155    = apr_pcalloc(root->pool, sizeof(*private_data));
156  svn_packed__int_stream_t *stream
157    = apr_palloc(root->pool, sizeof(*stream));
158
159  private_data->diff = diff;
160  private_data->is_signed = signed_ints;
161  private_data->is_last = TRUE;
162  private_data->pool = root->pool;
163
164  stream->buffer_used = 0;
165  stream->private_data = private_data;
166
167  /* maintain the ring list */
168  if (root->last_int_stream)
169    {
170      packed_int_private_t *previous_private_data
171        = root->last_int_stream->private_data;
172      previous_private_data->next = stream;
173      previous_private_data->is_last = FALSE;
174    }
175  else
176    {
177      root->first_int_stream = stream;
178    }
179
180  root->last_int_stream = stream;
181  root->int_stream_count++;
182
183  return stream;
184}
185
186svn_packed__int_stream_t *
187svn_packed__create_int_substream(svn_packed__int_stream_t *parent,
188                                 svn_boolean_t diff,
189                                 svn_boolean_t signed_ints)
190{
191  packed_int_private_t *parent_private = parent->private_data;
192
193  /* allocate and initialize the stream node */
194  packed_int_private_t *private_data
195    = apr_pcalloc(parent_private->pool, sizeof(*private_data));
196  svn_packed__int_stream_t *stream
197    = apr_palloc(parent_private->pool, sizeof(*stream));
198
199  private_data->diff = diff;
200  private_data->is_signed = signed_ints;
201  private_data->is_last = TRUE;
202  private_data->pool = parent_private->pool;
203
204  stream->buffer_used = 0;
205  stream->private_data = private_data;
206
207  /* maintain the ring list */
208  if (parent_private->last_substream)
209    {
210      packed_int_private_t *previous_private_data
211        = parent_private->last_substream->private_data;
212      previous_private_data->next = stream;
213      previous_private_data->is_last = FALSE;
214    }
215  else
216    {
217      parent_private->first_substream = stream;
218      parent_private->current_substream = stream;
219    }
220
221  parent_private->last_substream = stream;
222  parent_private->substream_count++;
223  private_data->next = parent_private->first_substream;
224
225  return stream;
226}
227
228/* Returns a new top-level byte sequence stream for ROOT but does not
229 * initialize the LENGTH_STREAM member.
230 */
231static svn_packed__byte_stream_t *
232create_bytes_stream_body(svn_packed__data_root_t *root)
233{
234  svn_packed__byte_stream_t *stream
235    = apr_pcalloc(root->pool, sizeof(*stream));
236
237  stream->packed = svn_stringbuf_create_empty(root->pool);
238
239  if (root->last_byte_stream)
240    root->last_byte_stream->next = stream;
241  else
242    root->first_byte_stream = stream;
243
244  root->last_byte_stream = stream;
245  root->byte_stream_count++;
246
247  return stream;
248}
249
250svn_packed__byte_stream_t *
251svn_packed__create_bytes_stream(svn_packed__data_root_t *root)
252{
253  svn_packed__byte_stream_t *stream
254    = create_bytes_stream_body(root);
255
256  stream->lengths_stream_index = root->int_stream_count;
257  stream->lengths_stream = svn_packed__create_int_stream(root, FALSE, FALSE);
258
259  return stream;
260}
261
262/* Write the 7b/8b representation of VALUE into BUFFER.  BUFFER must
263 * provide at least 10 bytes space.
264 * Returns the first position behind the written data.
265 */
266static unsigned char *
267write_packed_uint_body(unsigned char *buffer, apr_uint64_t value)
268{
269  while (value >= 0x80)
270    {
271      *(buffer++) = (unsigned char)((value % 0x80) + 0x80);
272      value /= 0x80;
273    }
274
275  *(buffer++) = (unsigned char)value;
276  return buffer;
277}
278
279/* Return remapped VALUE.
280 *
281 * Due to sign conversion and diff underflow, values close to UINT64_MAX
282 * are almost as frequent as those close to 0.  Remap them such that the
283 * MSB is stored in the LSB and the remainder stores the absolute distance
284 * to 0.
285 *
286 * This minimizes the absolute value to store in many scenarios.
287 * Hence, the variable-length representation on disk is shorter, too.
288 */
289static apr_uint64_t
290remap_uint(apr_uint64_t value)
291{
292  return value & APR_UINT64_C(0x8000000000000000)
293       ? APR_UINT64_MAX - (2 * value)
294       : 2 * value;
295}
296
297/* Invert remap_uint. */
298static apr_uint64_t
299unmap_uint(apr_uint64_t value)
300{
301  return value & 1
302       ? (APR_UINT64_MAX - value / 2)
303       : value / 2;
304}
305
306/* Empty the unprocessed integer buffer in STREAM by either pushing the
307 * data to the sub-streams or writing to the packed data (in case there
308 * are no sub-streams).
309 */
310static void
311svn_packed__data_flush_buffer(svn_packed__int_stream_t *stream)
312{
313  packed_int_private_t *private_data = stream->private_data;
314  apr_size_t i;
315
316  /* if we have sub-streams, push the data down to them */
317  if (private_data->current_substream)
318    for (i = 0; i < stream->buffer_used; ++i)
319      {
320        packed_int_private_t *current_private_data
321          = private_data->current_substream->private_data;
322
323        svn_packed__add_uint(private_data->current_substream,
324                             stream->buffer[i]);
325        private_data->current_substream = current_private_data->next;
326      }
327  else
328    {
329      /* pack the numbers into our local PACKED buffer */
330
331      /* temporary buffer, max 10 bytes required per 7b/8b encoded number */
332      unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE];
333      unsigned char *p = local_buffer;
334
335      /* if configured, deltify numbers before packing them.
336         Since delta may be negative, always use the 'signed' encoding. */
337      if (private_data->diff)
338        {
339          apr_uint64_t last_value = private_data->last_value;
340          for (i = 0; i < stream->buffer_used; ++i)
341            {
342              apr_uint64_t temp = stream->buffer[i];
343              stream->buffer[i] = remap_uint(temp - last_value);
344              last_value = temp;
345            }
346
347          private_data->last_value = last_value;
348        }
349
350      /* if configured and not already done by the deltification above,
351         transform to 'signed' encoding.  Store the sign in the LSB and
352         the absolute value (-1 for negative values) in the remaining
353         63 bits. */
354      if (!private_data->diff && private_data->is_signed)
355        for (i = 0; i < stream->buffer_used; ++i)
356          stream->buffer[i] = remap_uint(stream->buffer[i]);
357
358      /* auto-create packed data buffer.  Give it some reasonable initial
359         size - just enough for a few tens of values. */
360      if (private_data->packed == NULL)
361        private_data->packed
362          = svn_stringbuf_create_ensure(256, private_data->pool);
363
364      /* encode numbers into our temp buffer. */
365      for (i = 0; i < stream->buffer_used; ++i)
366        p = write_packed_uint_body(p, stream->buffer[i]);
367
368      /* append them to the final packed data */
369      svn_stringbuf_appendbytes(private_data->packed,
370                                (char *)local_buffer,
371                                p - local_buffer);
372    }
373
374  /* maintain counters */
375  private_data->item_count += stream->buffer_used;
376  stream->buffer_used = 0;
377}
378
379void
380svn_packed__add_uint(svn_packed__int_stream_t *stream,
381                     apr_uint64_t value)
382{
383  stream->buffer[stream->buffer_used] = value;
384  if (++stream->buffer_used == SVN__PACKED_DATA_BUFFER_SIZE)
385    svn_packed__data_flush_buffer(stream);
386}
387
388void
389svn_packed__add_int(svn_packed__int_stream_t *stream,
390                    apr_int64_t value)
391{
392  svn_packed__add_uint(stream, (apr_uint64_t)value);
393}
394
395void
396svn_packed__add_bytes(svn_packed__byte_stream_t *stream,
397                      const char *data,
398                      apr_size_t len)
399{
400  svn_packed__add_uint(stream->lengths_stream, len);
401  svn_stringbuf_appendbytes(stream->packed, data, len);
402}
403
404/* Append the 7b/8b encoded representation of VALUE to PACKED.
405 */
406static void
407write_packed_uint(svn_stringbuf_t* packed, apr_uint64_t value)
408{
409  if (value < 0x80)
410    {
411      svn_stringbuf_appendbyte(packed, (char)value);
412    }
413  else
414    {
415      unsigned char buffer[10];
416      unsigned char *p = write_packed_uint_body(buffer, value);
417
418      svn_stringbuf_appendbytes(packed, (char *)buffer, p - buffer);
419    }
420}
421
422/* Recursively write the structure (config parameters, sub-streams, data
423 * sizes) of the STREAM and all its siblings to the TREE_STRUCT buffer.
424 */
425static void
426write_int_stream_structure(svn_stringbuf_t* tree_struct,
427                           svn_packed__int_stream_t* stream)
428{
429  while (stream)
430    {
431      /* store config parameters and number of sub-streams in 1 number */
432      packed_int_private_t *private_data = stream->private_data;
433      write_packed_uint(tree_struct, (private_data->substream_count << 2)
434                                   + (private_data->diff ? 1 : 0)
435                                   + (private_data->is_signed ? 2 : 0));
436
437      /* store item count and length their of packed representation */
438      svn_packed__data_flush_buffer(stream);
439
440      write_packed_uint(tree_struct, private_data->item_count);
441      write_packed_uint(tree_struct, private_data->packed
442                                   ? private_data->packed->len
443                                   : 0);
444
445      /* append all sub-stream structures */
446      write_int_stream_structure(tree_struct, private_data->first_substream);
447
448      /* continue with next sibling */
449      stream = private_data->is_last ? NULL : private_data->next;
450    }
451}
452
453/* Recursively write the structure (sub-streams, data sizes) of the STREAM
454 * and all its siblings to the TREE_STRUCT buffer.
455 */
456static void
457write_byte_stream_structure(svn_stringbuf_t* tree_struct,
458                            svn_packed__byte_stream_t* stream)
459{
460  /* for this and all siblings */
461  for (; stream; stream = stream->next)
462    {
463      /* this stream's structure and size */
464      write_packed_uint(tree_struct, 0);
465      write_packed_uint(tree_struct, stream->lengths_stream_index);
466      write_packed_uint(tree_struct, stream->packed->len);
467
468      /* followed by all its sub-streams */
469      write_byte_stream_structure(tree_struct, stream->first_substream);
470    }
471}
472
473/* Write the 7b/8b encoded representation of VALUE to STREAM.
474 */
475static svn_error_t *
476write_stream_uint(svn_stream_t *stream,
477                  apr_uint64_t value)
478{
479  unsigned char buffer[10];
480  apr_size_t count = write_packed_uint_body(buffer, value) - buffer;
481
482  SVN_ERR(svn_stream_write(stream, (char *)buffer, &count));
483
484  return SVN_NO_ERROR;
485}
486
487/* Return the total size of all packed data in STREAM, its siblings and
488 * all sub-streams.  To get an accurate value, flush all buffers prior to
489 * calling this function.
490 */
491static apr_size_t
492packed_int_stream_length(svn_packed__int_stream_t *stream)
493{
494  packed_int_private_t *private_data = stream->private_data;
495  apr_size_t result = private_data->packed ? private_data->packed->len : 0;
496
497  stream = private_data->first_substream;
498  while (stream)
499    {
500      private_data = stream->private_data;
501      result += packed_int_stream_length(stream);
502      stream = private_data->is_last ? NULL : private_data->next;
503    }
504
505  return result;
506}
507
508/* Return the total size of all byte sequences data in STREAM, its siblings
509 * and all sub-streams.
510 */
511static apr_size_t
512packed_byte_stream_length(svn_packed__byte_stream_t *stream)
513{
514  apr_size_t result = stream->packed->len;
515
516  for (stream = stream->first_substream; stream; stream = stream->next)
517    result += packed_byte_stream_length(stream);
518
519  return result;
520}
521
522/* Append all packed data in STREAM, its siblings and all sub-streams to
523 * COMBINED.
524 */
525static void
526append_int_stream(svn_packed__int_stream_t *stream,
527                  svn_stringbuf_t *combined)
528{
529  packed_int_private_t *private_data = stream->private_data;
530  if (private_data->packed)
531    svn_stringbuf_appendstr(combined, private_data->packed);
532
533  stream = private_data->first_substream;
534  while (stream)
535    {
536      private_data = stream->private_data;
537      append_int_stream(stream, combined);
538      stream = private_data->is_last ? NULL : private_data->next;
539    }
540}
541
542/* Append all byte sequences in STREAM, its siblings and all sub-streams
543 * to COMBINED.
544 */
545static void
546append_byte_stream(svn_packed__byte_stream_t *stream,
547                   svn_stringbuf_t *combined)
548{
549  svn_stringbuf_appendstr(combined, stream->packed);
550
551  for (stream = stream->first_substream; stream; stream = stream->next)
552    append_byte_stream(stream, combined);
553}
554
555/* Take the binary data in UNCOMPRESSED, zip it into COMPRESSED and write
556 * it to STREAM.  COMPRESSED simply acts as a re-usable memory buffer.
557 * Clear all buffers (COMPRESSED, UNCOMPRESSED) at the end of the function.
558 */
559static svn_error_t *
560write_stream_data(svn_stream_t *stream,
561                  svn_stringbuf_t *uncompressed,
562                  svn_stringbuf_t *compressed)
563{
564  SVN_ERR(svn__compress(uncompressed,
565                        compressed,
566                        SVN_DELTA_COMPRESSION_LEVEL_DEFAULT));
567
568  SVN_ERR(write_stream_uint(stream, compressed->len));
569  SVN_ERR(svn_stream_write(stream, compressed->data, &compressed->len));
570
571  svn_stringbuf_setempty(uncompressed);
572  svn_stringbuf_setempty(compressed);
573
574  return SVN_NO_ERROR;
575}
576
577svn_error_t *
578svn_packed__data_write(svn_stream_t *stream,
579                       svn_packed__data_root_t *root,
580                       apr_pool_t *scratch_pool)
581{
582  svn_packed__int_stream_t *int_stream;
583  svn_packed__byte_stream_t *byte_stream;
584
585  /* re-usable data buffers */
586  svn_stringbuf_t *compressed
587    = svn_stringbuf_create_ensure(1024, scratch_pool);
588  svn_stringbuf_t *uncompressed
589    = svn_stringbuf_create_ensure(1024, scratch_pool);
590
591  /* write tree structure */
592  svn_stringbuf_t *tree_struct
593    = svn_stringbuf_create_ensure(127, scratch_pool);
594
595  write_packed_uint(tree_struct, root->int_stream_count);
596  write_int_stream_structure(tree_struct, root->first_int_stream);
597
598  write_packed_uint(tree_struct, root->byte_stream_count);
599  write_byte_stream_structure(tree_struct, root->first_byte_stream);
600
601  SVN_ERR(write_stream_uint(stream, tree_struct->len));
602  SVN_ERR(svn_stream_write(stream, tree_struct->data, &tree_struct->len));
603
604  /* flatten sub-streams, zip them and write them to disk */
605
606  for (int_stream = root->first_int_stream;
607       int_stream;
608       int_stream = ((packed_int_private_t*)int_stream->private_data)->next)
609    {
610      apr_size_t len = packed_int_stream_length(int_stream);
611      svn_stringbuf_ensure(uncompressed, len);
612
613      append_int_stream(int_stream, uncompressed);
614      SVN_ERR(write_stream_data(stream, uncompressed, compressed));
615    }
616
617  for (byte_stream = root->first_byte_stream;
618       byte_stream;
619       byte_stream = byte_stream->next)
620    {
621      apr_size_t len = packed_byte_stream_length(byte_stream);
622      svn_stringbuf_ensure(uncompressed, len);
623
624      append_byte_stream(byte_stream, uncompressed);
625      SVN_ERR(write_stream_data(stream, uncompressed, compressed));
626    }
627
628  return SVN_NO_ERROR;
629}
630
631
632/* Read access. */
633
634svn_packed__int_stream_t *
635svn_packed__first_int_stream(svn_packed__data_root_t *root)
636{
637  return root->first_int_stream;
638}
639
640svn_packed__byte_stream_t *
641svn_packed__first_byte_stream(svn_packed__data_root_t *root)
642{
643  return root->first_byte_stream;
644}
645
646svn_packed__int_stream_t *
647svn_packed__next_int_stream(svn_packed__int_stream_t *stream)
648{
649  packed_int_private_t *private_data = stream->private_data;
650  return private_data->is_last ? NULL : private_data->next;
651}
652
653svn_packed__byte_stream_t *
654svn_packed__next_byte_stream(svn_packed__byte_stream_t *stream)
655{
656  return stream->next;
657}
658
659svn_packed__int_stream_t *
660svn_packed__first_int_substream(svn_packed__int_stream_t *stream)
661{
662  packed_int_private_t *private_data = stream->private_data;
663  return private_data->first_substream;
664}
665
666apr_size_t
667svn_packed__int_count(svn_packed__int_stream_t *stream)
668{
669  packed_int_private_t *private_data = stream->private_data;
670  return private_data->item_count + stream->buffer_used;
671}
672
673apr_size_t
674svn_packed__byte_count(svn_packed__byte_stream_t *stream)
675{
676  return stream->packed->len;
677}
678
679/* Read one 7b/8b encoded value from *P and return it in *RESULT.  Returns
680 * the first position after the parsed data.
681 *
682 * Overflows will be detected in the sense that it will end parsing the
683 * input but the result is undefined.
684 */
685static unsigned char *
686read_packed_uint_body(unsigned char *p, apr_uint64_t *result)
687{
688  if (*p < 0x80)
689    {
690      *result = *p;
691    }
692  else
693    {
694      apr_uint64_t shift = 0;
695      apr_uint64_t value = 0;
696      while (*p >= 0x80)
697        {
698          value += (apr_uint64_t)(*p & 0x7f) << shift;
699          ++p;
700
701          shift += 7;
702          if (shift > 64)
703            {
704              /* a definite overflow.  Note, that numbers of 65 .. 70
705                 bits will not be detected as an overflow as they don't
706                 threaten to exceed the input buffer. */
707              *result = 0;
708              return p;
709            }
710        }
711
712      *result = value + ((apr_uint64_t)*p << shift);
713    }
714
715  return ++p;
716}
717
718/* Read one 7b/8b encoded value from STREAM and return it in *RESULT.
719 *
720 * Overflows will be detected in the sense that it will end parsing the
721 * input but the result is undefined.
722 */
723static svn_error_t *
724read_stream_uint(svn_stream_t *stream, apr_uint64_t *result)
725{
726  apr_uint64_t shift = 0;
727  apr_uint64_t value = 0;
728  unsigned char c;
729
730  do
731    {
732      apr_size_t len = 1;
733      SVN_ERR(svn_stream_read_full(stream, (char *)&c, &len));
734      if (len != 1)
735        return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
736                                _("Unexpected end of stream"));
737
738      value += (apr_uint64_t)(c & 0x7f) << shift;
739      shift += 7;
740      if (shift > 64)
741        return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
742                                _("Integer representation too long"));
743    }
744  while (c >= 0x80);
745
746  *result = value;
747  return SVN_NO_ERROR;
748}
749
750/* Extract and return the next integer from PACKED and make PACKED point
751 * to the next integer.
752 */
753static apr_uint64_t
754read_packed_uint(svn_stringbuf_t *packed)
755{
756  apr_uint64_t result = 0;
757  unsigned char *p = (unsigned char *)packed->data;
758  apr_size_t read = read_packed_uint_body(p, &result) - p;
759
760  if (read > packed->len)
761    read = packed->len;
762
763  packed->data += read;
764  packed->blocksize -= read;
765  packed->len -= read;
766
767  return result;
768}
769
770/* Ensure that STREAM contains at least one item in its buffer.
771 */
772static void
773svn_packed__data_fill_buffer(svn_packed__int_stream_t *stream)
774{
775  packed_int_private_t *private_data = stream->private_data;
776  apr_size_t i;
777  apr_size_t end = MIN(SVN__PACKED_DATA_BUFFER_SIZE,
778                       private_data->item_count);
779
780  /* in case, some user calls us explicitly without a good reason ... */
781  if (stream->buffer_used)
782    return;
783
784  /* can we get data from the sub-streams or do we have to decode it from
785     our local packed container? */
786  if (private_data->current_substream)
787    for (i = end; i > 0; --i)
788      {
789        packed_int_private_t *current_private_data
790          = private_data->current_substream->private_data;
791        stream->buffer[i-1]
792          = svn_packed__get_uint(private_data->current_substream);
793        private_data->current_substream = current_private_data->next;
794      }
795  else
796    {
797      /* use this local buffer only if the packed data is shorter than this.
798         The goal is that read_packed_uint_body doesn't need check for
799         overflows. */
800      unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE];
801      unsigned char *p;
802      unsigned char *start;
803      apr_size_t packed_read;
804
805      if (private_data->packed->len < sizeof(local_buffer))
806        {
807          apr_size_t trail = sizeof(local_buffer) - private_data->packed->len;
808          memcpy(local_buffer,
809                 private_data->packed->data,
810                 private_data->packed->len);
811          memset(local_buffer + private_data->packed->len, 0, MIN(trail, end));
812
813          p = local_buffer;
814        }
815      else
816        p = (unsigned char *)private_data->packed->data;
817
818      /* unpack numbers */
819      start = p;
820      for (i = end; i > 0; --i)
821        p = read_packed_uint_body(p, &stream->buffer[i-1]);
822
823      /* adjust remaining packed data buffer */
824      packed_read = p - start;
825      private_data->packed->data += packed_read;
826      private_data->packed->len -= packed_read;
827      private_data->packed->blocksize -= packed_read;
828
829      /* undeltify numbers, if configured */
830      if (private_data->diff)
831        {
832          apr_uint64_t last_value = private_data->last_value;
833          for (i = end; i > 0; --i)
834            {
835              last_value += unmap_uint(stream->buffer[i-1]);
836              stream->buffer[i-1] = last_value;
837            }
838
839          private_data->last_value = last_value;
840        }
841
842      /* handle signed values, if configured and not handled already */
843      if (!private_data->diff && private_data->is_signed)
844        for (i = 0; i < end; ++i)
845          stream->buffer[i] = unmap_uint(stream->buffer[i]);
846    }
847
848  stream->buffer_used = end;
849  private_data->item_count -= end;
850}
851
852apr_uint64_t
853svn_packed__get_uint(svn_packed__int_stream_t *stream)
854{
855  if (stream->buffer_used == 0)
856    svn_packed__data_fill_buffer(stream);
857
858  return stream->buffer_used ? stream->buffer[--stream->buffer_used] : 0;
859}
860
861apr_int64_t
862svn_packed__get_int(svn_packed__int_stream_t *stream)
863{
864  return (apr_int64_t)svn_packed__get_uint(stream);
865}
866
867const char *
868svn_packed__get_bytes(svn_packed__byte_stream_t *stream,
869                      apr_size_t *len)
870{
871  const char *result = stream->packed->data;
872  apr_size_t count = (apr_size_t)svn_packed__get_uint(stream->lengths_stream);
873
874  if (count > stream->packed->len)
875    count = stream->packed->len;
876
877  /* advance packed buffer */
878  stream->packed->data += count;
879  stream->packed->len -= count;
880  stream->packed->blocksize -= count;
881
882  *len = count;
883  return result;
884}
885
886/* Read the integer stream structure and recreate it in STREAM, including
887 * sub-streams, from TREE_STRUCT.
888 */
889static void
890read_int_stream_structure(svn_stringbuf_t *tree_struct,
891                          svn_packed__int_stream_t *stream)
892{
893  packed_int_private_t *private_data = stream->private_data;
894  apr_uint64_t value = read_packed_uint(tree_struct);
895  apr_size_t substream_count;
896  apr_size_t i;
897
898  /* extract local parameters */
899  private_data->diff = (value & 1) != 0;
900  private_data->is_signed = (value & 2) != 0;
901  substream_count = (apr_size_t)(value >> 2);
902
903  /* read item count & packed size; allocate packed data buffer */
904  private_data->item_count = (apr_size_t)read_packed_uint(tree_struct);
905  value = read_packed_uint(tree_struct);
906  if (value)
907    {
908      private_data->packed = svn_stringbuf_create_ensure((apr_size_t)value,
909                                                         private_data->pool);
910      private_data->packed->len = (apr_size_t)value;
911    }
912
913  /* add sub-streams and read their config, too */
914  for (i = 0; i < substream_count; ++i)
915    read_int_stream_structure(tree_struct,
916                              svn_packed__create_int_substream(stream,
917                                                               FALSE,
918                                                               FALSE));
919}
920
921/* Read the integer stream structure and recreate it in STREAM, including
922 * sub-streams, from TREE_STRUCT.  FIRST_INT_STREAM is the integer stream
923 * that would correspond to lengths_stream_index 0.
924 */
925static void
926read_byte_stream_structure(svn_stringbuf_t *tree_struct,
927                           svn_packed__byte_stream_t *stream,
928                           svn_packed__int_stream_t *first_int_stream)
929{
930  apr_size_t lengths_stream_index;
931  apr_size_t packed_size;
932  apr_size_t i;
933
934  /* read parameters from the TREE_STRUCT buffer */
935  (void) (apr_size_t)read_packed_uint(tree_struct); /* discard first uint */
936  lengths_stream_index = (apr_size_t)read_packed_uint(tree_struct);
937  packed_size = (apr_size_t)read_packed_uint(tree_struct);
938
939  /* allocate byte sequence buffer size */
940  svn_stringbuf_ensure(stream->packed, packed_size);
941  stream->packed->len = packed_size;
942
943  /* navigate to the (already existing) lengths_stream */
944  stream->lengths_stream_index = lengths_stream_index;
945  stream->lengths_stream = first_int_stream;
946  for (i = 0; i < lengths_stream_index; ++i)
947    {
948      packed_int_private_t *length_private
949        = stream->lengths_stream->private_data;
950      stream->lengths_stream = length_private->next;
951    }
952}
953
954/* Read a compressed block from STREAM and uncompress it into UNCOMPRESSED.
955 * UNCOMPRESSED_LEN is the expected size of the stream.  COMPRESSED is a
956 * re-used buffer for temporary data.
957 */
958static svn_error_t *
959read_stream_data(svn_stream_t *stream,
960                 apr_size_t uncompressed_len,
961                 svn_stringbuf_t *uncompressed,
962                 svn_stringbuf_t *compressed)
963{
964  apr_uint64_t len;
965  apr_size_t compressed_len;
966
967  SVN_ERR(read_stream_uint(stream, &len));
968  compressed_len = (apr_size_t)len;
969
970  svn_stringbuf_ensure(compressed, compressed_len);
971  compressed->len = compressed_len;
972  SVN_ERR(svn_stream_read_full(stream, compressed->data, &compressed->len));
973  compressed->data[compressed_len] = '\0';
974
975  SVN_ERR(svn__decompress(compressed, uncompressed, uncompressed_len));
976
977  return SVN_NO_ERROR;
978}
979
980/* Read the packed contents from COMBINED, starting at *OFFSET and store
981 * it in STREAM.  Update *OFFSET to point to the next stream's data and
982 * continue with the sub-streams.
983 */
984static void
985unflatten_int_stream(svn_packed__int_stream_t *stream,
986                     svn_stringbuf_t *combined,
987                     apr_size_t *offset)
988{
989  packed_int_private_t *private_data = stream->private_data;
990  if (private_data->packed)
991    {
992      memcpy(private_data->packed->data,
993             combined->data + *offset,
994             private_data->packed->len);
995
996      private_data->packed->data[private_data->packed->len] = '\0';
997      *offset += private_data->packed->len;
998    }
999
1000  stream = private_data->first_substream;
1001  while (stream)
1002    {
1003      private_data = stream->private_data;
1004      unflatten_int_stream(stream, combined, offset);
1005      stream = private_data->is_last ? NULL : private_data->next;
1006    }
1007}
1008
1009/* Read the packed contents from COMBINED, starting at *OFFSET and store
1010 * it in STREAM.  Update *OFFSET to point to the next stream's data and
1011 * continue with the sub-streams.
1012 */
1013static void
1014unflatten_byte_stream(svn_packed__byte_stream_t *stream,
1015                      svn_stringbuf_t *combined,
1016                      apr_size_t *offset)
1017{
1018  memcpy(stream->packed->data,
1019         combined->data + *offset,
1020         stream->packed->len);
1021  stream->packed->data[stream->packed->len] = '\0';
1022
1023  *offset += stream->packed->len;
1024  for (stream = stream->first_substream; stream; stream = stream->next)
1025    unflatten_byte_stream(stream, combined, offset);
1026}
1027
1028svn_error_t *
1029svn_packed__data_read(svn_packed__data_root_t **root_p,
1030                      svn_stream_t *stream,
1031                      apr_pool_t *result_pool,
1032                      apr_pool_t *scratch_pool)
1033{
1034  apr_uint64_t i;
1035  apr_uint64_t count;
1036
1037  svn_packed__int_stream_t *int_stream;
1038  svn_packed__byte_stream_t *byte_stream;
1039  svn_packed__data_root_t *root = svn_packed__data_create_root(result_pool);
1040
1041  svn_stringbuf_t *compressed
1042    = svn_stringbuf_create_ensure(1024, scratch_pool);
1043  svn_stringbuf_t *uncompressed
1044    = svn_stringbuf_create_ensure(1024, scratch_pool);
1045
1046  /* read tree structure */
1047
1048  apr_uint64_t tree_struct_size;
1049  svn_stringbuf_t *tree_struct;
1050
1051  SVN_ERR(read_stream_uint(stream, &tree_struct_size));
1052  tree_struct
1053    = svn_stringbuf_create_ensure((apr_size_t)tree_struct_size, scratch_pool);
1054  tree_struct->len = (apr_size_t)tree_struct_size;
1055
1056  SVN_ERR(svn_stream_read_full(stream, tree_struct->data, &tree_struct->len));
1057  tree_struct->data[tree_struct->len] = '\0';
1058
1059  /* reconstruct tree structure */
1060
1061  count = read_packed_uint(tree_struct);
1062  for (i = 0; i < count; ++i)
1063    read_int_stream_structure(tree_struct,
1064                              svn_packed__create_int_stream(root, FALSE,
1065                                                                 FALSE));
1066
1067  count = read_packed_uint(tree_struct);
1068  for (i = 0; i < count; ++i)
1069    read_byte_stream_structure(tree_struct,
1070                               create_bytes_stream_body(root),
1071                               root->first_int_stream);
1072
1073  /* read sub-stream data from disk, unzip it and buffer it */
1074
1075  for (int_stream = root->first_int_stream;
1076       int_stream;
1077       int_stream = ((packed_int_private_t*)int_stream->private_data)->next)
1078    {
1079      apr_size_t offset = 0;
1080      SVN_ERR(read_stream_data(stream,
1081                               packed_int_stream_length(int_stream),
1082                               uncompressed, compressed));
1083      unflatten_int_stream(int_stream, uncompressed, &offset);
1084    }
1085
1086  for (byte_stream = root->first_byte_stream;
1087       byte_stream;
1088       byte_stream = byte_stream->next)
1089    {
1090      apr_size_t offset = 0;
1091      SVN_ERR(read_stream_data(stream,
1092                               packed_byte_stream_length(byte_stream),
1093                               uncompressed, compressed));
1094      unflatten_byte_stream(byte_stream, uncompressed, &offset);
1095    }
1096
1097  *root_p = root;
1098  return SVN_NO_ERROR;
1099}
1100