1289177Speter/* packed_data.c : implement the packed binary stream data structure 2289177Speter * 3289177Speter * ==================================================================== 4289177Speter * Licensed to the Apache Software Foundation (ASF) under one 5289177Speter * or more contributor license agreements. See the NOTICE file 6289177Speter * distributed with this work for additional information 7289177Speter * regarding copyright ownership. The ASF licenses this file 8289177Speter * to you under the Apache License, Version 2.0 (the 9289177Speter * "License"); you may not use this file except in compliance 10289177Speter * with the License. You may obtain a copy of the License at 11289177Speter * 12289177Speter * http://www.apache.org/licenses/LICENSE-2.0 13289177Speter * 14289177Speter * Unless required by applicable law or agreed to in writing, 15289177Speter * software distributed under the License is distributed on an 16289177Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17289177Speter * KIND, either express or implied. See the License for the 18289177Speter * specific language governing permissions and limitations 19289177Speter * under the License. 20289177Speter * ==================================================================== 21289177Speter */ 22289177Speter 23289177Speter#include <apr_tables.h> 24289177Speter 25289177Speter#include "svn_string.h" 26289177Speter#include "svn_sorts.h" 27289177Speter#include "private/svn_string_private.h" 28289177Speter#include "private/svn_subr_private.h" 29289177Speter#include "private/svn_delta_private.h" 30289177Speter#include "private/svn_packed_data.h" 31289177Speter 32289177Speter#include "svn_private_config.h" 33289177Speter 34289177Speter 35289177Speter 36289177Speter/* Private int stream data referenced by svn_packed__int_stream_t. 37289177Speter */ 38289177Spetertypedef struct packed_int_private_t 39289177Speter{ 40289177Speter /* First sub-stream, if any. NULL otherwise. */ 41289177Speter svn_packed__int_stream_t *first_substream; 42289177Speter 43289177Speter /* Last sub-stream, if any. NULL otherwise. */ 44289177Speter svn_packed__int_stream_t *last_substream; 45289177Speter 46289177Speter /* Current sub-stream to read from / write to, if any. NULL otherwise. 47289177Speter This will be initialized to FIRST_SUBSTREAM and then advanced in a 48289177Speter round-robin scheme after each number being read. */ 49289177Speter svn_packed__int_stream_t *current_substream; 50289177Speter 51289177Speter /* Number of sub-streams. */ 52289177Speter apr_size_t substream_count; 53289177Speter 54289177Speter /* Next (sibling) integer stream. If this is the last one, points to 55289177Speter the first in the list (i.e. this forms a ring list). Never NULL. */ 56289177Speter svn_packed__int_stream_t *next; 57289177Speter 58289177Speter /* 7b/8b encoded integer values (previously diff'ed and sign-handled, 59289177Speter if indicated by the flags below). The contents are disjoint from 60289177Speter the unparsed number buffer. May be NULL while not written to. */ 61289177Speter svn_stringbuf_t *packed; 62289177Speter 63289177Speter /* Initialized to 0. Latest value written to / read from PACKED. 64289177Speter Undefined if DIFF is FALSE. */ 65289177Speter apr_uint64_t last_value; 66289177Speter 67289177Speter /* Deltify data before storing it in PACKED. */ 68289177Speter svn_boolean_t diff; 69289177Speter 70289177Speter /* Numbers are likely to contain negative values with small absolutes. 71289177Speter If TRUE, store the signed bit in LSB before encoding. */ 72289177Speter svn_boolean_t is_signed; 73289177Speter 74289177Speter /* Number of integers in this stream. */ 75289177Speter apr_size_t item_count; 76289177Speter 77289177Speter /* TRUE for the last stream in a list of siblings. */ 78289177Speter svn_boolean_t is_last; 79289177Speter 80289177Speter /* Pool to use for allocations. */ 81289177Speter apr_pool_t *pool; 82289177Speter} packed_int_private_t; 83289177Speter 84289177Speter/* A byte sequence stream. Please note that NEXT is defined different 85289177Speter * from the NEXT member in integer streams. 86289177Speter */ 87289177Speterstruct svn_packed__byte_stream_t 88289177Speter{ 89289177Speter /* First sub-stream, if any. NULL otherwise. */ 90289177Speter svn_packed__byte_stream_t *first_substream; 91289177Speter 92289177Speter /* Last sub-stream, if any. NULL otherwise. */ 93289177Speter svn_packed__byte_stream_t *last_substream; 94289177Speter 95289177Speter /* Next (sibling) byte sequence stream, if any. NULL otherwise. */ 96289177Speter svn_packed__byte_stream_t *next; 97289177Speter 98289177Speter /* Stream to store the sequence lengths. */ 99289177Speter svn_packed__int_stream_t *lengths_stream; 100289177Speter 101289177Speter /* It's index (relative to its parent). */ 102289177Speter apr_size_t lengths_stream_index; 103289177Speter 104289177Speter /* Concatenated byte sequences. */ 105289177Speter svn_stringbuf_t *packed; 106289177Speter 107289177Speter /* Pool to use for allocations. */ 108289177Speter apr_pool_t *pool; 109289177Speter}; 110289177Speter 111289177Speter/* The serialization root object. It references the top-level streams. 112289177Speter */ 113289177Speterstruct svn_packed__data_root_t 114289177Speter{ 115289177Speter /* First top-level integer stream, if any. NULL otherwise. */ 116289177Speter svn_packed__int_stream_t *first_int_stream; 117289177Speter 118289177Speter /* Last top-level integer stream, if any. NULL otherwise. */ 119289177Speter svn_packed__int_stream_t *last_int_stream; 120289177Speter 121289177Speter /* Number of top-level integer streams. */ 122289177Speter apr_size_t int_stream_count; 123289177Speter 124289177Speter /* First top-level byte sequence stream, if any. NULL otherwise. */ 125289177Speter svn_packed__byte_stream_t *first_byte_stream; 126289177Speter 127289177Speter /* Last top-level byte sequence stream, if any. NULL otherwise. */ 128289177Speter svn_packed__byte_stream_t *last_byte_stream; 129289177Speter 130289177Speter /* Number of top-level byte sequence streams. */ 131289177Speter apr_size_t byte_stream_count; 132289177Speter 133289177Speter /* Pool to use for allocations. */ 134289177Speter apr_pool_t *pool; 135289177Speter}; 136289177Speter 137289177Speter/* Write access. */ 138289177Speter 139289177Spetersvn_packed__data_root_t * 140289177Spetersvn_packed__data_create_root(apr_pool_t *pool) 141289177Speter{ 142289177Speter svn_packed__data_root_t *root = apr_pcalloc(pool, sizeof(*root)); 143289177Speter root->pool = pool; 144289177Speter 145289177Speter return root; 146289177Speter} 147289177Speter 148289177Spetersvn_packed__int_stream_t * 149289177Spetersvn_packed__create_int_stream(svn_packed__data_root_t *root, 150289177Speter svn_boolean_t diff, 151289177Speter svn_boolean_t signed_ints) 152289177Speter{ 153289177Speter /* allocate and initialize the stream node */ 154289177Speter packed_int_private_t *private_data 155289177Speter = apr_pcalloc(root->pool, sizeof(*private_data)); 156289177Speter svn_packed__int_stream_t *stream 157289177Speter = apr_palloc(root->pool, sizeof(*stream)); 158289177Speter 159289177Speter private_data->diff = diff; 160289177Speter private_data->is_signed = signed_ints; 161289177Speter private_data->is_last = TRUE; 162289177Speter private_data->pool = root->pool; 163289177Speter 164289177Speter stream->buffer_used = 0; 165289177Speter stream->private_data = private_data; 166289177Speter 167289177Speter /* maintain the ring list */ 168289177Speter if (root->last_int_stream) 169289177Speter { 170289177Speter packed_int_private_t *previous_private_data 171289177Speter = root->last_int_stream->private_data; 172289177Speter previous_private_data->next = stream; 173289177Speter previous_private_data->is_last = FALSE; 174289177Speter } 175289177Speter else 176289177Speter { 177289177Speter root->first_int_stream = stream; 178289177Speter } 179289177Speter 180289177Speter root->last_int_stream = stream; 181289177Speter root->int_stream_count++; 182289177Speter 183289177Speter return stream; 184289177Speter} 185289177Speter 186289177Spetersvn_packed__int_stream_t * 187289177Spetersvn_packed__create_int_substream(svn_packed__int_stream_t *parent, 188289177Speter svn_boolean_t diff, 189289177Speter svn_boolean_t signed_ints) 190289177Speter{ 191289177Speter packed_int_private_t *parent_private = parent->private_data; 192289177Speter 193289177Speter /* allocate and initialize the stream node */ 194289177Speter packed_int_private_t *private_data 195289177Speter = apr_pcalloc(parent_private->pool, sizeof(*private_data)); 196289177Speter svn_packed__int_stream_t *stream 197289177Speter = apr_palloc(parent_private->pool, sizeof(*stream)); 198289177Speter 199289177Speter private_data->diff = diff; 200289177Speter private_data->is_signed = signed_ints; 201289177Speter private_data->is_last = TRUE; 202289177Speter private_data->pool = parent_private->pool; 203289177Speter 204289177Speter stream->buffer_used = 0; 205289177Speter stream->private_data = private_data; 206289177Speter 207289177Speter /* maintain the ring list */ 208289177Speter if (parent_private->last_substream) 209289177Speter { 210289177Speter packed_int_private_t *previous_private_data 211289177Speter = parent_private->last_substream->private_data; 212289177Speter previous_private_data->next = stream; 213289177Speter previous_private_data->is_last = FALSE; 214289177Speter } 215289177Speter else 216289177Speter { 217289177Speter parent_private->first_substream = stream; 218289177Speter parent_private->current_substream = stream; 219289177Speter } 220289177Speter 221289177Speter parent_private->last_substream = stream; 222289177Speter parent_private->substream_count++; 223289177Speter private_data->next = parent_private->first_substream; 224289177Speter 225289177Speter return stream; 226289177Speter} 227289177Speter 228289177Speter/* Returns a new top-level byte sequence stream for ROOT but does not 229289177Speter * initialize the LENGTH_STREAM member. 230289177Speter */ 231289177Speterstatic svn_packed__byte_stream_t * 232289177Spetercreate_bytes_stream_body(svn_packed__data_root_t *root) 233289177Speter{ 234289177Speter svn_packed__byte_stream_t *stream 235289177Speter = apr_pcalloc(root->pool, sizeof(*stream)); 236289177Speter 237289177Speter stream->packed = svn_stringbuf_create_empty(root->pool); 238289177Speter 239289177Speter if (root->last_byte_stream) 240289177Speter root->last_byte_stream->next = stream; 241289177Speter else 242289177Speter root->first_byte_stream = stream; 243289177Speter 244289177Speter root->last_byte_stream = stream; 245289177Speter root->byte_stream_count++; 246289177Speter 247289177Speter return stream; 248289177Speter} 249289177Speter 250289177Spetersvn_packed__byte_stream_t * 251289177Spetersvn_packed__create_bytes_stream(svn_packed__data_root_t *root) 252289177Speter{ 253289177Speter svn_packed__byte_stream_t *stream 254289177Speter = create_bytes_stream_body(root); 255289177Speter 256289177Speter stream->lengths_stream_index = root->int_stream_count; 257289177Speter stream->lengths_stream = svn_packed__create_int_stream(root, FALSE, FALSE); 258289177Speter 259289177Speter return stream; 260289177Speter} 261289177Speter 262289177Speter/* Write the 7b/8b representation of VALUE into BUFFER. BUFFER must 263289177Speter * provide at least 10 bytes space. 264289177Speter * Returns the first position behind the written data. 265289177Speter */ 266289177Speterstatic unsigned char * 267289177Speterwrite_packed_uint_body(unsigned char *buffer, apr_uint64_t value) 268289177Speter{ 269289177Speter while (value >= 0x80) 270289177Speter { 271289177Speter *(buffer++) = (unsigned char)((value % 0x80) + 0x80); 272289177Speter value /= 0x80; 273289177Speter } 274289177Speter 275289177Speter *(buffer++) = (unsigned char)value; 276289177Speter return buffer; 277289177Speter} 278289177Speter 279289177Speter/* Return remapped VALUE. 280289177Speter * 281289177Speter * Due to sign conversion and diff underflow, values close to UINT64_MAX 282289177Speter * are almost as frequent as those close to 0. Remap them such that the 283289177Speter * MSB is stored in the LSB and the remainder stores the absolute distance 284289177Speter * to 0. 285289177Speter * 286289177Speter * This minimizes the absolute value to store in many scenarios. 287289177Speter * Hence, the variable-length representation on disk is shorter, too. 288289177Speter */ 289289177Speterstatic apr_uint64_t 290289177Speterremap_uint(apr_uint64_t value) 291289177Speter{ 292289177Speter return value & APR_UINT64_C(0x8000000000000000) 293289177Speter ? APR_UINT64_MAX - (2 * value) 294289177Speter : 2 * value; 295289177Speter} 296289177Speter 297289177Speter/* Invert remap_uint. */ 298289177Speterstatic apr_uint64_t 299289177Speterunmap_uint(apr_uint64_t value) 300289177Speter{ 301289177Speter return value & 1 302289177Speter ? (APR_UINT64_MAX - value / 2) 303289177Speter : value / 2; 304289177Speter} 305289177Speter 306289177Speter/* Empty the unprocessed integer buffer in STREAM by either pushing the 307289177Speter * data to the sub-streams or writing to the packed data (in case there 308289177Speter * are no sub-streams). 309289177Speter */ 310289177Speterstatic void 311362181Sdimdata_flush_buffer(svn_packed__int_stream_t *stream) 312289177Speter{ 313289177Speter packed_int_private_t *private_data = stream->private_data; 314289177Speter apr_size_t i; 315289177Speter 316289177Speter /* if we have sub-streams, push the data down to them */ 317289177Speter if (private_data->current_substream) 318289177Speter for (i = 0; i < stream->buffer_used; ++i) 319289177Speter { 320289177Speter packed_int_private_t *current_private_data 321289177Speter = private_data->current_substream->private_data; 322289177Speter 323289177Speter svn_packed__add_uint(private_data->current_substream, 324289177Speter stream->buffer[i]); 325289177Speter private_data->current_substream = current_private_data->next; 326289177Speter } 327289177Speter else 328289177Speter { 329289177Speter /* pack the numbers into our local PACKED buffer */ 330289177Speter 331289177Speter /* temporary buffer, max 10 bytes required per 7b/8b encoded number */ 332289177Speter unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE]; 333289177Speter unsigned char *p = local_buffer; 334289177Speter 335289177Speter /* if configured, deltify numbers before packing them. 336289177Speter Since delta may be negative, always use the 'signed' encoding. */ 337289177Speter if (private_data->diff) 338289177Speter { 339289177Speter apr_uint64_t last_value = private_data->last_value; 340289177Speter for (i = 0; i < stream->buffer_used; ++i) 341289177Speter { 342289177Speter apr_uint64_t temp = stream->buffer[i]; 343289177Speter stream->buffer[i] = remap_uint(temp - last_value); 344289177Speter last_value = temp; 345289177Speter } 346289177Speter 347289177Speter private_data->last_value = last_value; 348289177Speter } 349289177Speter 350289177Speter /* if configured and not already done by the deltification above, 351289177Speter transform to 'signed' encoding. Store the sign in the LSB and 352289177Speter the absolute value (-1 for negative values) in the remaining 353289177Speter 63 bits. */ 354289177Speter if (!private_data->diff && private_data->is_signed) 355289177Speter for (i = 0; i < stream->buffer_used; ++i) 356289177Speter stream->buffer[i] = remap_uint(stream->buffer[i]); 357289177Speter 358289177Speter /* auto-create packed data buffer. Give it some reasonable initial 359289177Speter size - just enough for a few tens of values. */ 360289177Speter if (private_data->packed == NULL) 361289177Speter private_data->packed 362289177Speter = svn_stringbuf_create_ensure(256, private_data->pool); 363289177Speter 364289177Speter /* encode numbers into our temp buffer. */ 365289177Speter for (i = 0; i < stream->buffer_used; ++i) 366289177Speter p = write_packed_uint_body(p, stream->buffer[i]); 367289177Speter 368289177Speter /* append them to the final packed data */ 369289177Speter svn_stringbuf_appendbytes(private_data->packed, 370289177Speter (char *)local_buffer, 371289177Speter p - local_buffer); 372289177Speter } 373289177Speter 374289177Speter /* maintain counters */ 375289177Speter private_data->item_count += stream->buffer_used; 376289177Speter stream->buffer_used = 0; 377289177Speter} 378289177Speter 379289177Spetervoid 380289177Spetersvn_packed__add_uint(svn_packed__int_stream_t *stream, 381289177Speter apr_uint64_t value) 382289177Speter{ 383289177Speter stream->buffer[stream->buffer_used] = value; 384289177Speter if (++stream->buffer_used == SVN__PACKED_DATA_BUFFER_SIZE) 385362181Sdim data_flush_buffer(stream); 386289177Speter} 387289177Speter 388289177Spetervoid 389289177Spetersvn_packed__add_int(svn_packed__int_stream_t *stream, 390289177Speter apr_int64_t value) 391289177Speter{ 392289177Speter svn_packed__add_uint(stream, (apr_uint64_t)value); 393289177Speter} 394289177Speter 395289177Spetervoid 396289177Spetersvn_packed__add_bytes(svn_packed__byte_stream_t *stream, 397289177Speter const char *data, 398289177Speter apr_size_t len) 399289177Speter{ 400289177Speter svn_packed__add_uint(stream->lengths_stream, len); 401289177Speter svn_stringbuf_appendbytes(stream->packed, data, len); 402289177Speter} 403289177Speter 404289177Speter/* Append the 7b/8b encoded representation of VALUE to PACKED. 405289177Speter */ 406289177Speterstatic void 407289177Speterwrite_packed_uint(svn_stringbuf_t* packed, apr_uint64_t value) 408289177Speter{ 409289177Speter if (value < 0x80) 410289177Speter { 411289177Speter svn_stringbuf_appendbyte(packed, (char)value); 412289177Speter } 413289177Speter else 414289177Speter { 415289177Speter unsigned char buffer[10]; 416289177Speter unsigned char *p = write_packed_uint_body(buffer, value); 417289177Speter 418289177Speter svn_stringbuf_appendbytes(packed, (char *)buffer, p - buffer); 419289177Speter } 420289177Speter} 421289177Speter 422289177Speter/* Recursively write the structure (config parameters, sub-streams, data 423289177Speter * sizes) of the STREAM and all its siblings to the TREE_STRUCT buffer. 424289177Speter */ 425289177Speterstatic void 426289177Speterwrite_int_stream_structure(svn_stringbuf_t* tree_struct, 427289177Speter svn_packed__int_stream_t* stream) 428289177Speter{ 429289177Speter while (stream) 430289177Speter { 431289177Speter /* store config parameters and number of sub-streams in 1 number */ 432289177Speter packed_int_private_t *private_data = stream->private_data; 433289177Speter write_packed_uint(tree_struct, (private_data->substream_count << 2) 434289177Speter + (private_data->diff ? 1 : 0) 435289177Speter + (private_data->is_signed ? 2 : 0)); 436289177Speter 437289177Speter /* store item count and length their of packed representation */ 438362181Sdim data_flush_buffer(stream); 439289177Speter 440289177Speter write_packed_uint(tree_struct, private_data->item_count); 441289177Speter write_packed_uint(tree_struct, private_data->packed 442289177Speter ? private_data->packed->len 443289177Speter : 0); 444289177Speter 445289177Speter /* append all sub-stream structures */ 446289177Speter write_int_stream_structure(tree_struct, private_data->first_substream); 447289177Speter 448289177Speter /* continue with next sibling */ 449289177Speter stream = private_data->is_last ? NULL : private_data->next; 450289177Speter } 451289177Speter} 452289177Speter 453289177Speter/* Recursively write the structure (sub-streams, data sizes) of the STREAM 454289177Speter * and all its siblings to the TREE_STRUCT buffer. 455289177Speter */ 456289177Speterstatic void 457289177Speterwrite_byte_stream_structure(svn_stringbuf_t* tree_struct, 458289177Speter svn_packed__byte_stream_t* stream) 459289177Speter{ 460289177Speter /* for this and all siblings */ 461289177Speter for (; stream; stream = stream->next) 462289177Speter { 463289177Speter /* this stream's structure and size */ 464289177Speter write_packed_uint(tree_struct, 0); 465289177Speter write_packed_uint(tree_struct, stream->lengths_stream_index); 466289177Speter write_packed_uint(tree_struct, stream->packed->len); 467289177Speter 468289177Speter /* followed by all its sub-streams */ 469289177Speter write_byte_stream_structure(tree_struct, stream->first_substream); 470289177Speter } 471289177Speter} 472289177Speter 473289177Speter/* Write the 7b/8b encoded representation of VALUE to STREAM. 474289177Speter */ 475289177Speterstatic svn_error_t * 476289177Speterwrite_stream_uint(svn_stream_t *stream, 477289177Speter apr_uint64_t value) 478289177Speter{ 479289177Speter unsigned char buffer[10]; 480289177Speter apr_size_t count = write_packed_uint_body(buffer, value) - buffer; 481289177Speter 482289177Speter SVN_ERR(svn_stream_write(stream, (char *)buffer, &count)); 483289177Speter 484289177Speter return SVN_NO_ERROR; 485289177Speter} 486289177Speter 487289177Speter/* Return the total size of all packed data in STREAM, its siblings and 488289177Speter * all sub-streams. To get an accurate value, flush all buffers prior to 489289177Speter * calling this function. 490289177Speter */ 491289177Speterstatic apr_size_t 492289177Speterpacked_int_stream_length(svn_packed__int_stream_t *stream) 493289177Speter{ 494289177Speter packed_int_private_t *private_data = stream->private_data; 495289177Speter apr_size_t result = private_data->packed ? private_data->packed->len : 0; 496289177Speter 497289177Speter stream = private_data->first_substream; 498289177Speter while (stream) 499289177Speter { 500289177Speter private_data = stream->private_data; 501289177Speter result += packed_int_stream_length(stream); 502289177Speter stream = private_data->is_last ? NULL : private_data->next; 503289177Speter } 504289177Speter 505289177Speter return result; 506289177Speter} 507289177Speter 508289177Speter/* Return the total size of all byte sequences data in STREAM, its siblings 509289177Speter * and all sub-streams. 510289177Speter */ 511289177Speterstatic apr_size_t 512289177Speterpacked_byte_stream_length(svn_packed__byte_stream_t *stream) 513289177Speter{ 514289177Speter apr_size_t result = stream->packed->len; 515289177Speter 516289177Speter for (stream = stream->first_substream; stream; stream = stream->next) 517289177Speter result += packed_byte_stream_length(stream); 518289177Speter 519289177Speter return result; 520289177Speter} 521289177Speter 522289177Speter/* Append all packed data in STREAM, its siblings and all sub-streams to 523289177Speter * COMBINED. 524289177Speter */ 525289177Speterstatic void 526289177Speterappend_int_stream(svn_packed__int_stream_t *stream, 527289177Speter svn_stringbuf_t *combined) 528289177Speter{ 529289177Speter packed_int_private_t *private_data = stream->private_data; 530289177Speter if (private_data->packed) 531289177Speter svn_stringbuf_appendstr(combined, private_data->packed); 532289177Speter 533289177Speter stream = private_data->first_substream; 534289177Speter while (stream) 535289177Speter { 536289177Speter private_data = stream->private_data; 537289177Speter append_int_stream(stream, combined); 538289177Speter stream = private_data->is_last ? NULL : private_data->next; 539289177Speter } 540289177Speter} 541289177Speter 542289177Speter/* Append all byte sequences in STREAM, its siblings and all sub-streams 543289177Speter * to COMBINED. 544289177Speter */ 545289177Speterstatic void 546289177Speterappend_byte_stream(svn_packed__byte_stream_t *stream, 547289177Speter svn_stringbuf_t *combined) 548289177Speter{ 549289177Speter svn_stringbuf_appendstr(combined, stream->packed); 550289177Speter 551289177Speter for (stream = stream->first_substream; stream; stream = stream->next) 552289177Speter append_byte_stream(stream, combined); 553289177Speter} 554289177Speter 555289177Speter/* Take the binary data in UNCOMPRESSED, zip it into COMPRESSED and write 556289177Speter * it to STREAM. COMPRESSED simply acts as a re-usable memory buffer. 557289177Speter * Clear all buffers (COMPRESSED, UNCOMPRESSED) at the end of the function. 558289177Speter */ 559289177Speterstatic svn_error_t * 560289177Speterwrite_stream_data(svn_stream_t *stream, 561289177Speter svn_stringbuf_t *uncompressed, 562289177Speter svn_stringbuf_t *compressed) 563289177Speter{ 564362181Sdim SVN_ERR(svn__compress_zlib(uncompressed->data, uncompressed->len, 565362181Sdim compressed, 566362181Sdim SVN_DELTA_COMPRESSION_LEVEL_DEFAULT)); 567289177Speter 568289177Speter SVN_ERR(write_stream_uint(stream, compressed->len)); 569289177Speter SVN_ERR(svn_stream_write(stream, compressed->data, &compressed->len)); 570289177Speter 571289177Speter svn_stringbuf_setempty(uncompressed); 572289177Speter svn_stringbuf_setempty(compressed); 573289177Speter 574289177Speter return SVN_NO_ERROR; 575289177Speter} 576289177Speter 577289177Spetersvn_error_t * 578289177Spetersvn_packed__data_write(svn_stream_t *stream, 579289177Speter svn_packed__data_root_t *root, 580289177Speter apr_pool_t *scratch_pool) 581289177Speter{ 582289177Speter svn_packed__int_stream_t *int_stream; 583289177Speter svn_packed__byte_stream_t *byte_stream; 584289177Speter 585289177Speter /* re-usable data buffers */ 586289177Speter svn_stringbuf_t *compressed 587289177Speter = svn_stringbuf_create_ensure(1024, scratch_pool); 588289177Speter svn_stringbuf_t *uncompressed 589289177Speter = svn_stringbuf_create_ensure(1024, scratch_pool); 590289177Speter 591289177Speter /* write tree structure */ 592289177Speter svn_stringbuf_t *tree_struct 593289177Speter = svn_stringbuf_create_ensure(127, scratch_pool); 594289177Speter 595289177Speter write_packed_uint(tree_struct, root->int_stream_count); 596289177Speter write_int_stream_structure(tree_struct, root->first_int_stream); 597289177Speter 598289177Speter write_packed_uint(tree_struct, root->byte_stream_count); 599289177Speter write_byte_stream_structure(tree_struct, root->first_byte_stream); 600289177Speter 601289177Speter SVN_ERR(write_stream_uint(stream, tree_struct->len)); 602289177Speter SVN_ERR(svn_stream_write(stream, tree_struct->data, &tree_struct->len)); 603289177Speter 604289177Speter /* flatten sub-streams, zip them and write them to disk */ 605289177Speter 606289177Speter for (int_stream = root->first_int_stream; 607289177Speter int_stream; 608289177Speter int_stream = ((packed_int_private_t*)int_stream->private_data)->next) 609289177Speter { 610289177Speter apr_size_t len = packed_int_stream_length(int_stream); 611289177Speter svn_stringbuf_ensure(uncompressed, len); 612289177Speter 613289177Speter append_int_stream(int_stream, uncompressed); 614289177Speter SVN_ERR(write_stream_data(stream, uncompressed, compressed)); 615289177Speter } 616289177Speter 617289177Speter for (byte_stream = root->first_byte_stream; 618289177Speter byte_stream; 619289177Speter byte_stream = byte_stream->next) 620289177Speter { 621289177Speter apr_size_t len = packed_byte_stream_length(byte_stream); 622289177Speter svn_stringbuf_ensure(uncompressed, len); 623289177Speter 624289177Speter append_byte_stream(byte_stream, uncompressed); 625289177Speter SVN_ERR(write_stream_data(stream, uncompressed, compressed)); 626289177Speter } 627289177Speter 628289177Speter return SVN_NO_ERROR; 629289177Speter} 630289177Speter 631289177Speter 632289177Speter/* Read access. */ 633289177Speter 634289177Spetersvn_packed__int_stream_t * 635289177Spetersvn_packed__first_int_stream(svn_packed__data_root_t *root) 636289177Speter{ 637289177Speter return root->first_int_stream; 638289177Speter} 639289177Speter 640289177Spetersvn_packed__byte_stream_t * 641289177Spetersvn_packed__first_byte_stream(svn_packed__data_root_t *root) 642289177Speter{ 643289177Speter return root->first_byte_stream; 644289177Speter} 645289177Speter 646289177Spetersvn_packed__int_stream_t * 647289177Spetersvn_packed__next_int_stream(svn_packed__int_stream_t *stream) 648289177Speter{ 649289177Speter packed_int_private_t *private_data = stream->private_data; 650289177Speter return private_data->is_last ? NULL : private_data->next; 651289177Speter} 652289177Speter 653289177Spetersvn_packed__byte_stream_t * 654289177Spetersvn_packed__next_byte_stream(svn_packed__byte_stream_t *stream) 655289177Speter{ 656289177Speter return stream->next; 657289177Speter} 658289177Speter 659289177Spetersvn_packed__int_stream_t * 660289177Spetersvn_packed__first_int_substream(svn_packed__int_stream_t *stream) 661289177Speter{ 662289177Speter packed_int_private_t *private_data = stream->private_data; 663289177Speter return private_data->first_substream; 664289177Speter} 665289177Speter 666289177Speterapr_size_t 667289177Spetersvn_packed__int_count(svn_packed__int_stream_t *stream) 668289177Speter{ 669289177Speter packed_int_private_t *private_data = stream->private_data; 670289177Speter return private_data->item_count + stream->buffer_used; 671289177Speter} 672289177Speter 673289177Speterapr_size_t 674289177Spetersvn_packed__byte_count(svn_packed__byte_stream_t *stream) 675289177Speter{ 676289177Speter return stream->packed->len; 677289177Speter} 678289177Speter 679362181Sdimapr_size_t 680362181Sdimsvn_packed__byte_block_count(svn_packed__byte_stream_t *stream) 681362181Sdim{ 682362181Sdim return svn_packed__int_count(stream->lengths_stream); 683362181Sdim} 684362181Sdim 685289177Speter/* Read one 7b/8b encoded value from *P and return it in *RESULT. Returns 686289177Speter * the first position after the parsed data. 687289177Speter * 688289177Speter * Overflows will be detected in the sense that it will end parsing the 689289177Speter * input but the result is undefined. 690289177Speter */ 691289177Speterstatic unsigned char * 692289177Speterread_packed_uint_body(unsigned char *p, apr_uint64_t *result) 693289177Speter{ 694289177Speter if (*p < 0x80) 695289177Speter { 696289177Speter *result = *p; 697289177Speter } 698289177Speter else 699289177Speter { 700289177Speter apr_uint64_t shift = 0; 701289177Speter apr_uint64_t value = 0; 702289177Speter while (*p >= 0x80) 703289177Speter { 704289177Speter value += (apr_uint64_t)(*p & 0x7f) << shift; 705289177Speter ++p; 706289177Speter 707289177Speter shift += 7; 708289177Speter if (shift > 64) 709289177Speter { 710289177Speter /* a definite overflow. Note, that numbers of 65 .. 70 711289177Speter bits will not be detected as an overflow as they don't 712289177Speter threaten to exceed the input buffer. */ 713289177Speter *result = 0; 714289177Speter return p; 715289177Speter } 716289177Speter } 717289177Speter 718289177Speter *result = value + ((apr_uint64_t)*p << shift); 719289177Speter } 720289177Speter 721289177Speter return ++p; 722289177Speter} 723289177Speter 724289177Speter/* Read one 7b/8b encoded value from STREAM and return it in *RESULT. 725289177Speter * 726289177Speter * Overflows will be detected in the sense that it will end parsing the 727289177Speter * input but the result is undefined. 728289177Speter */ 729289177Speterstatic svn_error_t * 730289177Speterread_stream_uint(svn_stream_t *stream, apr_uint64_t *result) 731289177Speter{ 732289177Speter apr_uint64_t shift = 0; 733289177Speter apr_uint64_t value = 0; 734289177Speter unsigned char c; 735289177Speter 736289177Speter do 737289177Speter { 738289177Speter apr_size_t len = 1; 739289177Speter SVN_ERR(svn_stream_read_full(stream, (char *)&c, &len)); 740289177Speter if (len != 1) 741289177Speter return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL, 742289177Speter _("Unexpected end of stream")); 743289177Speter 744289177Speter value += (apr_uint64_t)(c & 0x7f) << shift; 745289177Speter shift += 7; 746289177Speter if (shift > 64) 747289177Speter return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL, 748289177Speter _("Integer representation too long")); 749289177Speter } 750289177Speter while (c >= 0x80); 751289177Speter 752289177Speter *result = value; 753289177Speter return SVN_NO_ERROR; 754289177Speter} 755289177Speter 756289177Speter/* Extract and return the next integer from PACKED and make PACKED point 757289177Speter * to the next integer. 758289177Speter */ 759289177Speterstatic apr_uint64_t 760289177Speterread_packed_uint(svn_stringbuf_t *packed) 761289177Speter{ 762289177Speter apr_uint64_t result = 0; 763289177Speter unsigned char *p = (unsigned char *)packed->data; 764289177Speter apr_size_t read = read_packed_uint_body(p, &result) - p; 765289177Speter 766289177Speter if (read > packed->len) 767289177Speter read = packed->len; 768289177Speter 769289177Speter packed->data += read; 770289177Speter packed->blocksize -= read; 771289177Speter packed->len -= read; 772289177Speter 773289177Speter return result; 774289177Speter} 775289177Speter 776289177Speter/* Ensure that STREAM contains at least one item in its buffer. 777289177Speter */ 778289177Speterstatic void 779289177Spetersvn_packed__data_fill_buffer(svn_packed__int_stream_t *stream) 780289177Speter{ 781289177Speter packed_int_private_t *private_data = stream->private_data; 782289177Speter apr_size_t i; 783289177Speter apr_size_t end = MIN(SVN__PACKED_DATA_BUFFER_SIZE, 784289177Speter private_data->item_count); 785289177Speter 786289177Speter /* in case, some user calls us explicitly without a good reason ... */ 787289177Speter if (stream->buffer_used) 788289177Speter return; 789289177Speter 790289177Speter /* can we get data from the sub-streams or do we have to decode it from 791289177Speter our local packed container? */ 792289177Speter if (private_data->current_substream) 793289177Speter for (i = end; i > 0; --i) 794289177Speter { 795289177Speter packed_int_private_t *current_private_data 796289177Speter = private_data->current_substream->private_data; 797289177Speter stream->buffer[i-1] 798289177Speter = svn_packed__get_uint(private_data->current_substream); 799289177Speter private_data->current_substream = current_private_data->next; 800289177Speter } 801289177Speter else 802289177Speter { 803289177Speter /* use this local buffer only if the packed data is shorter than this. 804289177Speter The goal is that read_packed_uint_body doesn't need check for 805289177Speter overflows. */ 806289177Speter unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE]; 807289177Speter unsigned char *p; 808289177Speter unsigned char *start; 809289177Speter apr_size_t packed_read; 810289177Speter 811289177Speter if (private_data->packed->len < sizeof(local_buffer)) 812289177Speter { 813289177Speter apr_size_t trail = sizeof(local_buffer) - private_data->packed->len; 814289177Speter memcpy(local_buffer, 815289177Speter private_data->packed->data, 816289177Speter private_data->packed->len); 817289177Speter memset(local_buffer + private_data->packed->len, 0, MIN(trail, end)); 818289177Speter 819289177Speter p = local_buffer; 820289177Speter } 821289177Speter else 822289177Speter p = (unsigned char *)private_data->packed->data; 823289177Speter 824289177Speter /* unpack numbers */ 825289177Speter start = p; 826289177Speter for (i = end; i > 0; --i) 827289177Speter p = read_packed_uint_body(p, &stream->buffer[i-1]); 828289177Speter 829289177Speter /* adjust remaining packed data buffer */ 830289177Speter packed_read = p - start; 831289177Speter private_data->packed->data += packed_read; 832289177Speter private_data->packed->len -= packed_read; 833289177Speter private_data->packed->blocksize -= packed_read; 834289177Speter 835289177Speter /* undeltify numbers, if configured */ 836289177Speter if (private_data->diff) 837289177Speter { 838289177Speter apr_uint64_t last_value = private_data->last_value; 839289177Speter for (i = end; i > 0; --i) 840289177Speter { 841289177Speter last_value += unmap_uint(stream->buffer[i-1]); 842289177Speter stream->buffer[i-1] = last_value; 843289177Speter } 844289177Speter 845289177Speter private_data->last_value = last_value; 846289177Speter } 847289177Speter 848289177Speter /* handle signed values, if configured and not handled already */ 849289177Speter if (!private_data->diff && private_data->is_signed) 850289177Speter for (i = 0; i < end; ++i) 851289177Speter stream->buffer[i] = unmap_uint(stream->buffer[i]); 852289177Speter } 853289177Speter 854289177Speter stream->buffer_used = end; 855289177Speter private_data->item_count -= end; 856289177Speter} 857289177Speter 858289177Speterapr_uint64_t 859289177Spetersvn_packed__get_uint(svn_packed__int_stream_t *stream) 860289177Speter{ 861289177Speter if (stream->buffer_used == 0) 862289177Speter svn_packed__data_fill_buffer(stream); 863289177Speter 864289177Speter return stream->buffer_used ? stream->buffer[--stream->buffer_used] : 0; 865289177Speter} 866289177Speter 867289177Speterapr_int64_t 868289177Spetersvn_packed__get_int(svn_packed__int_stream_t *stream) 869289177Speter{ 870289177Speter return (apr_int64_t)svn_packed__get_uint(stream); 871289177Speter} 872289177Speter 873289177Speterconst char * 874289177Spetersvn_packed__get_bytes(svn_packed__byte_stream_t *stream, 875289177Speter apr_size_t *len) 876289177Speter{ 877289177Speter const char *result = stream->packed->data; 878289177Speter apr_size_t count = (apr_size_t)svn_packed__get_uint(stream->lengths_stream); 879289177Speter 880289177Speter if (count > stream->packed->len) 881289177Speter count = stream->packed->len; 882289177Speter 883289177Speter /* advance packed buffer */ 884289177Speter stream->packed->data += count; 885289177Speter stream->packed->len -= count; 886289177Speter stream->packed->blocksize -= count; 887289177Speter 888289177Speter *len = count; 889289177Speter return result; 890289177Speter} 891289177Speter 892289177Speter/* Read the integer stream structure and recreate it in STREAM, including 893289177Speter * sub-streams, from TREE_STRUCT. 894289177Speter */ 895289177Speterstatic void 896289177Speterread_int_stream_structure(svn_stringbuf_t *tree_struct, 897289177Speter svn_packed__int_stream_t *stream) 898289177Speter{ 899289177Speter packed_int_private_t *private_data = stream->private_data; 900289177Speter apr_uint64_t value = read_packed_uint(tree_struct); 901289177Speter apr_size_t substream_count; 902289177Speter apr_size_t i; 903289177Speter 904289177Speter /* extract local parameters */ 905289177Speter private_data->diff = (value & 1) != 0; 906289177Speter private_data->is_signed = (value & 2) != 0; 907289177Speter substream_count = (apr_size_t)(value >> 2); 908289177Speter 909289177Speter /* read item count & packed size; allocate packed data buffer */ 910289177Speter private_data->item_count = (apr_size_t)read_packed_uint(tree_struct); 911289177Speter value = read_packed_uint(tree_struct); 912289177Speter if (value) 913289177Speter { 914289177Speter private_data->packed = svn_stringbuf_create_ensure((apr_size_t)value, 915289177Speter private_data->pool); 916289177Speter private_data->packed->len = (apr_size_t)value; 917289177Speter } 918289177Speter 919289177Speter /* add sub-streams and read their config, too */ 920289177Speter for (i = 0; i < substream_count; ++i) 921289177Speter read_int_stream_structure(tree_struct, 922289177Speter svn_packed__create_int_substream(stream, 923289177Speter FALSE, 924289177Speter FALSE)); 925289177Speter} 926289177Speter 927289177Speter/* Read the integer stream structure and recreate it in STREAM, including 928289177Speter * sub-streams, from TREE_STRUCT. FIRST_INT_STREAM is the integer stream 929289177Speter * that would correspond to lengths_stream_index 0. 930289177Speter */ 931289177Speterstatic void 932289177Speterread_byte_stream_structure(svn_stringbuf_t *tree_struct, 933289177Speter svn_packed__byte_stream_t *stream, 934289177Speter svn_packed__int_stream_t *first_int_stream) 935289177Speter{ 936289177Speter apr_size_t lengths_stream_index; 937289177Speter apr_size_t packed_size; 938289177Speter apr_size_t i; 939289177Speter 940289177Speter /* read parameters from the TREE_STRUCT buffer */ 941289177Speter (void) (apr_size_t)read_packed_uint(tree_struct); /* discard first uint */ 942289177Speter lengths_stream_index = (apr_size_t)read_packed_uint(tree_struct); 943289177Speter packed_size = (apr_size_t)read_packed_uint(tree_struct); 944289177Speter 945289177Speter /* allocate byte sequence buffer size */ 946289177Speter svn_stringbuf_ensure(stream->packed, packed_size); 947289177Speter stream->packed->len = packed_size; 948289177Speter 949289177Speter /* navigate to the (already existing) lengths_stream */ 950289177Speter stream->lengths_stream_index = lengths_stream_index; 951289177Speter stream->lengths_stream = first_int_stream; 952289177Speter for (i = 0; i < lengths_stream_index; ++i) 953289177Speter { 954289177Speter packed_int_private_t *length_private 955289177Speter = stream->lengths_stream->private_data; 956289177Speter stream->lengths_stream = length_private->next; 957289177Speter } 958289177Speter} 959289177Speter 960289177Speter/* Read a compressed block from STREAM and uncompress it into UNCOMPRESSED. 961289177Speter * UNCOMPRESSED_LEN is the expected size of the stream. COMPRESSED is a 962289177Speter * re-used buffer for temporary data. 963289177Speter */ 964289177Speterstatic svn_error_t * 965289177Speterread_stream_data(svn_stream_t *stream, 966289177Speter apr_size_t uncompressed_len, 967289177Speter svn_stringbuf_t *uncompressed, 968289177Speter svn_stringbuf_t *compressed) 969289177Speter{ 970289177Speter apr_uint64_t len; 971289177Speter apr_size_t compressed_len; 972289177Speter 973289177Speter SVN_ERR(read_stream_uint(stream, &len)); 974289177Speter compressed_len = (apr_size_t)len; 975289177Speter 976289177Speter svn_stringbuf_ensure(compressed, compressed_len); 977289177Speter compressed->len = compressed_len; 978289177Speter SVN_ERR(svn_stream_read_full(stream, compressed->data, &compressed->len)); 979289177Speter compressed->data[compressed_len] = '\0'; 980289177Speter 981362181Sdim SVN_ERR(svn__decompress_zlib(compressed->data, compressed->len, 982362181Sdim uncompressed, uncompressed_len)); 983289177Speter 984289177Speter return SVN_NO_ERROR; 985289177Speter} 986289177Speter 987289177Speter/* Read the packed contents from COMBINED, starting at *OFFSET and store 988289177Speter * it in STREAM. Update *OFFSET to point to the next stream's data and 989289177Speter * continue with the sub-streams. 990289177Speter */ 991289177Speterstatic void 992289177Speterunflatten_int_stream(svn_packed__int_stream_t *stream, 993289177Speter svn_stringbuf_t *combined, 994289177Speter apr_size_t *offset) 995289177Speter{ 996289177Speter packed_int_private_t *private_data = stream->private_data; 997289177Speter if (private_data->packed) 998289177Speter { 999289177Speter memcpy(private_data->packed->data, 1000289177Speter combined->data + *offset, 1001289177Speter private_data->packed->len); 1002289177Speter 1003289177Speter private_data->packed->data[private_data->packed->len] = '\0'; 1004289177Speter *offset += private_data->packed->len; 1005289177Speter } 1006289177Speter 1007289177Speter stream = private_data->first_substream; 1008289177Speter while (stream) 1009289177Speter { 1010289177Speter private_data = stream->private_data; 1011289177Speter unflatten_int_stream(stream, combined, offset); 1012289177Speter stream = private_data->is_last ? NULL : private_data->next; 1013289177Speter } 1014289177Speter} 1015289177Speter 1016289177Speter/* Read the packed contents from COMBINED, starting at *OFFSET and store 1017289177Speter * it in STREAM. Update *OFFSET to point to the next stream's data and 1018289177Speter * continue with the sub-streams. 1019289177Speter */ 1020289177Speterstatic void 1021289177Speterunflatten_byte_stream(svn_packed__byte_stream_t *stream, 1022289177Speter svn_stringbuf_t *combined, 1023289177Speter apr_size_t *offset) 1024289177Speter{ 1025289177Speter memcpy(stream->packed->data, 1026289177Speter combined->data + *offset, 1027289177Speter stream->packed->len); 1028289177Speter stream->packed->data[stream->packed->len] = '\0'; 1029289177Speter 1030289177Speter *offset += stream->packed->len; 1031289177Speter for (stream = stream->first_substream; stream; stream = stream->next) 1032289177Speter unflatten_byte_stream(stream, combined, offset); 1033289177Speter} 1034289177Speter 1035289177Spetersvn_error_t * 1036289177Spetersvn_packed__data_read(svn_packed__data_root_t **root_p, 1037289177Speter svn_stream_t *stream, 1038289177Speter apr_pool_t *result_pool, 1039289177Speter apr_pool_t *scratch_pool) 1040289177Speter{ 1041289177Speter apr_uint64_t i; 1042289177Speter apr_uint64_t count; 1043289177Speter 1044289177Speter svn_packed__int_stream_t *int_stream; 1045289177Speter svn_packed__byte_stream_t *byte_stream; 1046289177Speter svn_packed__data_root_t *root = svn_packed__data_create_root(result_pool); 1047289177Speter 1048289177Speter svn_stringbuf_t *compressed 1049289177Speter = svn_stringbuf_create_ensure(1024, scratch_pool); 1050289177Speter svn_stringbuf_t *uncompressed 1051289177Speter = svn_stringbuf_create_ensure(1024, scratch_pool); 1052289177Speter 1053289177Speter /* read tree structure */ 1054289177Speter 1055289177Speter apr_uint64_t tree_struct_size; 1056289177Speter svn_stringbuf_t *tree_struct; 1057289177Speter 1058289177Speter SVN_ERR(read_stream_uint(stream, &tree_struct_size)); 1059289177Speter tree_struct 1060289177Speter = svn_stringbuf_create_ensure((apr_size_t)tree_struct_size, scratch_pool); 1061289177Speter tree_struct->len = (apr_size_t)tree_struct_size; 1062289177Speter 1063289177Speter SVN_ERR(svn_stream_read_full(stream, tree_struct->data, &tree_struct->len)); 1064289177Speter tree_struct->data[tree_struct->len] = '\0'; 1065289177Speter 1066289177Speter /* reconstruct tree structure */ 1067289177Speter 1068289177Speter count = read_packed_uint(tree_struct); 1069289177Speter for (i = 0; i < count; ++i) 1070289177Speter read_int_stream_structure(tree_struct, 1071289177Speter svn_packed__create_int_stream(root, FALSE, 1072289177Speter FALSE)); 1073289177Speter 1074289177Speter count = read_packed_uint(tree_struct); 1075289177Speter for (i = 0; i < count; ++i) 1076289177Speter read_byte_stream_structure(tree_struct, 1077289177Speter create_bytes_stream_body(root), 1078289177Speter root->first_int_stream); 1079289177Speter 1080289177Speter /* read sub-stream data from disk, unzip it and buffer it */ 1081289177Speter 1082289177Speter for (int_stream = root->first_int_stream; 1083289177Speter int_stream; 1084289177Speter int_stream = ((packed_int_private_t*)int_stream->private_data)->next) 1085289177Speter { 1086289177Speter apr_size_t offset = 0; 1087289177Speter SVN_ERR(read_stream_data(stream, 1088289177Speter packed_int_stream_length(int_stream), 1089289177Speter uncompressed, compressed)); 1090289177Speter unflatten_int_stream(int_stream, uncompressed, &offset); 1091289177Speter } 1092289177Speter 1093289177Speter for (byte_stream = root->first_byte_stream; 1094289177Speter byte_stream; 1095289177Speter byte_stream = byte_stream->next) 1096289177Speter { 1097289177Speter apr_size_t offset = 0; 1098289177Speter SVN_ERR(read_stream_data(stream, 1099289177Speter packed_byte_stream_length(byte_stream), 1100289177Speter uncompressed, compressed)); 1101289177Speter unflatten_byte_stream(byte_stream, uncompressed, &offset); 1102289177Speter } 1103289177Speter 1104289177Speter *root_p = root; 1105289177Speter return SVN_NO_ERROR; 1106289177Speter} 1107