1289177Speter/* reps.c --- FSX representation container 2289177Speter * 3289177Speter * ==================================================================== 4289177Speter * Licensed to the Apache Software Foundation (ASF) under one 5289177Speter * or more contributor license agreements. See the NOTICE file 6289177Speter * distributed with this work for additional information 7289177Speter * regarding copyright ownership. The ASF licenses this file 8289177Speter * to you under the Apache License, Version 2.0 (the 9289177Speter * "License"); you may not use this file except in compliance 10289177Speter * with the License. You may obtain a copy of the License at 11289177Speter * 12289177Speter * http://www.apache.org/licenses/LICENSE-2.0 13289177Speter * 14289177Speter * Unless required by applicable law or agreed to in writing, 15289177Speter * software distributed under the License is distributed on an 16289177Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17289177Speter * KIND, either express or implied. See the License for the 18289177Speter * specific language governing permissions and limitations 19289177Speter * under the License. 20289177Speter * ==================================================================== 21289177Speter */ 22289177Speter 23289177Speter#include "reps.h" 24289177Speter 25289177Speter#include "svn_sorts.h" 26289177Speter#include "private/svn_string_private.h" 27289177Speter#include "private/svn_packed_data.h" 28289177Speter#include "private/svn_temp_serializer.h" 29289177Speter 30289177Speter#include "svn_private_config.h" 31289177Speter 32289177Speter#include "cached_data.h" 33289177Speter 34289177Speter/* Length of the text chunks we hash and match. The algorithm will find 35289177Speter * most matches with a length of 2 * MATCH_BLOCKSIZE and only specific 36289177Speter * ones that are shorter than MATCH_BLOCKSIZE. 37289177Speter * 38289177Speter * This should be a power of two and must be a multiple of 8. 39289177Speter * Good choices are 32, 64 and 128. 40289177Speter */ 41289177Speter#define MATCH_BLOCKSIZE 64 42289177Speter 43289177Speter/* Limit the total text body within a container to 16MB. Larger values 44289177Speter * of up to 2GB are possible but become increasingly impractical as the 45289177Speter * container has to be loaded in its entirety before any of it can be read. 46289177Speter */ 47289177Speter#define MAX_TEXT_BODY 0x1000000 48289177Speter 49289177Speter/* Limit the size of the instructions stream. This should not exceed the 50289177Speter * text body size limit. */ 51289177Speter#define MAX_INSTRUCTIONS (MAX_TEXT_BODY / 8) 52289177Speter 53289177Speter/* value of unused hash buckets */ 54289177Speter#define NO_OFFSET ((apr_uint32_t)(-1)) 55289177Speter 56289177Speter/* Byte strings are described by a series of copy instructions that each 57289177Speter * do one of the following 58289177Speter * 59289177Speter * - copy a given number of bytes from the text corpus starting at a 60289177Speter * given offset 61289177Speter * - reference other instruction and specify how many of instructions of 62289177Speter * that sequence shall be executed (i.e. a sub-sequence) 63289177Speter * - copy a number of bytes from the base representation buffer starting 64289177Speter * at a given offset 65289177Speter */ 66289177Speter 67289177Speter/* The contents of a fulltext / representation is defined by its first 68289177Speter * instruction and the number of instructions to execute. 69289177Speter */ 70289177Spetertypedef struct rep_t 71289177Speter{ 72289177Speter apr_uint32_t first_instruction; 73289177Speter apr_uint32_t instruction_count; 74289177Speter} rep_t; 75289177Speter 76289177Speter/* A single instruction. The instruction type is being encoded in OFFSET. 77289177Speter */ 78289177Spetertypedef struct instruction_t 79289177Speter{ 80289177Speter /* Instruction type and offset. 81289177Speter * - offset < 0 82289177Speter * reference to instruction sub-sequence starting with 83289177Speter * container->instructions[-offset]. 84289177Speter * - 0 <= offset < container->base_text_len 85289177Speter * reference to the base text corpus; 86289177Speter * start copy at offset 87289177Speter * - offset >= container->base_text_len 88289177Speter * reference to the text corpus; 89289177Speter * start copy at offset-container->base_text_len 90289177Speter */ 91289177Speter apr_int32_t offset; 92289177Speter 93289177Speter /* Number of bytes to copy / instructions to execute 94289177Speter */ 95289177Speter apr_uint32_t count; 96289177Speter} instruction_t; 97289177Speter 98289177Speter/* Describe a base fulltext. 99289177Speter */ 100289177Spetertypedef struct base_t 101289177Speter{ 102289177Speter /* Revision */ 103289177Speter svn_revnum_t revision; 104289177Speter 105289177Speter /* Item within that revision */ 106289177Speter apr_uint64_t item_index; 107289177Speter 108289177Speter /* Priority with which to use this base over others */ 109289177Speter int priority; 110289177Speter 111289177Speter /* Index into builder->representations that identifies the copy 112289177Speter * instructions for this base. */ 113289177Speter apr_uint32_t rep; 114289177Speter} base_t; 115289177Speter 116289177Speter/* Yet another hash data structure. This one tries to be more cache 117289177Speter * friendly by putting the first byte of each hashed sequence in a 118289177Speter * common array. This array will often fit into L1 or L2 at least and 119289177Speter * give a 99% accurate test for a match without giving false negatives. 120289177Speter */ 121289177Spetertypedef struct hash_t 122289177Speter{ 123289177Speter /* for used entries i, prefixes[i] == text[offsets[i]]; 0 otherwise. 124289177Speter * This allows for a quick check without resolving the double 125289177Speter * indirection. */ 126289177Speter char *prefixes; 127289177Speter 128289177Speter /* for used entries i, offsets[i] is start offset in the text corpus; 129289177Speter * NO_OFFSET otherwise. 130289177Speter */ 131289177Speter apr_uint32_t *offsets; 132289177Speter 133289177Speter /* to be used later for optimizations. */ 134289177Speter apr_uint32_t *last_matches; 135289177Speter 136289177Speter /* number of buckets in this hash, i.e. elements in each array above. 137289177Speter * Must be 1 << (8 * sizeof(hash_key_t) - shift) */ 138289177Speter apr_size_t size; 139289177Speter 140289177Speter /* number of buckets actually in use. Must be <= size. */ 141289177Speter apr_size_t used; 142289177Speter 143289177Speter /* number of bits to shift right to map a hash_key_t to a bucket index */ 144289177Speter apr_size_t shift; 145289177Speter 146289177Speter /* pool to use when growing the hash */ 147289177Speter apr_pool_t *pool; 148289177Speter} hash_t; 149289177Speter 150289177Speter/* Hash key type. 32 bits for pseudo-Adler32 hash sums. 151289177Speter */ 152289177Spetertypedef apr_uint32_t hash_key_t; 153289177Speter 154289177Speter/* Constructor data structure. 155289177Speter */ 156289177Speterstruct svn_fs_x__reps_builder_t 157289177Speter{ 158289177Speter /* file system to read base representations from */ 159289177Speter svn_fs_t *fs; 160289177Speter 161289177Speter /* text corpus */ 162289177Speter svn_stringbuf_t *text; 163289177Speter 164289177Speter /* text block hash */ 165289177Speter hash_t hash; 166289177Speter 167289177Speter /* array of base_t objects describing all bases defined so far */ 168289177Speter apr_array_header_t *bases; 169289177Speter 170289177Speter /* array of rep_t objects describing all fulltexts (including bases) 171289177Speter * added so far */ 172289177Speter apr_array_header_t *reps; 173289177Speter 174289177Speter /* array of instruction_t objects describing all instructions */ 175289177Speter apr_array_header_t *instructions; 176289177Speter 177289177Speter /* number of bytes in the text corpus that belongs to bases */ 178289177Speter apr_size_t base_text_len; 179289177Speter}; 180289177Speter 181289177Speter/* R/o container. 182289177Speter */ 183289177Speterstruct svn_fs_x__reps_t 184289177Speter{ 185289177Speter /* text corpus */ 186289177Speter const char *text; 187289177Speter 188289177Speter /* length of the text corpus in bytes */ 189289177Speter apr_size_t text_len; 190289177Speter 191289177Speter /* bases used */ 192289177Speter const base_t *bases; 193289177Speter 194289177Speter /* number of bases used */ 195289177Speter apr_size_t base_count; 196289177Speter 197289177Speter /* fulltext i can be reconstructed by executing instructions 198289177Speter * first_instructions[i] .. first_instructions[i+1]-1 199289177Speter * (this array has one extra element at the end) 200289177Speter */ 201289177Speter const apr_uint32_t *first_instructions; 202289177Speter 203289177Speter /* number of fulltexts (no bases) */ 204289177Speter apr_size_t rep_count; 205289177Speter 206289177Speter /* instructions */ 207289177Speter const instruction_t *instructions; 208289177Speter 209289177Speter /* total number of instructions */ 210289177Speter apr_size_t instruction_count; 211289177Speter 212289177Speter /* offsets > 0 but smaller that this are considered base references */ 213289177Speter apr_size_t base_text_len; 214289177Speter}; 215289177Speter 216289177Speter/* describe a section in the extractor's result string that is not filled 217289177Speter * yet (but already exists). 218289177Speter */ 219289177Spetertypedef struct missing_t 220289177Speter{ 221289177Speter /* start offset within the result string */ 222289177Speter apr_uint32_t start; 223289177Speter 224289177Speter /* number of bytes to write */ 225289177Speter apr_uint32_t count; 226289177Speter 227289177Speter /* index into extractor->bases selecting the base representation to 228289177Speter * copy from */ 229289177Speter apr_uint32_t base; 230289177Speter 231289177Speter /* copy source offset within that base representation */ 232289177Speter apr_uint32_t offset; 233289177Speter} missing_t; 234289177Speter 235289177Speter/* Fulltext extractor data structure. 236289177Speter */ 237289177Speterstruct svn_fs_x__rep_extractor_t 238289177Speter{ 239289177Speter /* filesystem to read the bases from */ 240289177Speter svn_fs_t *fs; 241289177Speter 242289177Speter /* fulltext being constructed */ 243289177Speter svn_stringbuf_t *result; 244289177Speter 245289177Speter /* bases (base_t) yet to process (not used ATM) */ 246289177Speter apr_array_header_t *bases; 247289177Speter 248289177Speter /* missing sections (missing_t) in result->data that need to be filled, 249289177Speter * yet */ 250289177Speter apr_array_header_t *missing; 251289177Speter 252289177Speter /* pool to use for allocating the above arrays */ 253289177Speter apr_pool_t *pool; 254289177Speter}; 255289177Speter 256289177Speter/* Given the ADLER32 checksum for a certain range of MATCH_BLOCKSIZE 257289177Speter * bytes, return the checksum for the range excluding the first byte 258289177Speter * C_OUT and appending C_IN. 259289177Speter */ 260289177Speterstatic hash_key_t 261289177Speterhash_key_replace(hash_key_t adler32, const char c_out, const char c_in) 262289177Speter{ 263289177Speter adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out)); 264289177Speter 265289177Speter adler32 -= (unsigned char)c_out; 266289177Speter adler32 += (unsigned char)c_in; 267289177Speter 268289177Speter return adler32 + adler32 * 0x10000; 269289177Speter} 270289177Speter 271289177Speter/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting 272289177Speter at DATA. Return the checksum value. */ 273289177Speterstatic hash_key_t 274289177Speterhash_key(const char *data) 275289177Speter{ 276289177Speter const unsigned char *input = (const unsigned char *)data; 277289177Speter const unsigned char *last = input + MATCH_BLOCKSIZE; 278289177Speter 279289177Speter hash_key_t s1 = 0; 280289177Speter hash_key_t s2 = 0; 281289177Speter 282289177Speter for (; input < last; input += 8) 283289177Speter { 284289177Speter s1 += input[0]; s2 += s1; 285289177Speter s1 += input[1]; s2 += s1; 286289177Speter s1 += input[2]; s2 += s1; 287289177Speter s1 += input[3]; s2 += s1; 288289177Speter s1 += input[4]; s2 += s1; 289289177Speter s1 += input[5]; s2 += s1; 290289177Speter s1 += input[6]; s2 += s1; 291289177Speter s1 += input[7]; s2 += s1; 292289177Speter } 293289177Speter 294289177Speter return s2 * 0x10000 + s1; 295289177Speter} 296289177Speter 297289177Speter/* Map the ADLER32 key to a bucket index in HASH and return that index. 298289177Speter */ 299289177Speterstatic apr_size_t 300289177Speterhash_to_index(hash_t *hash, hash_key_t adler32) 301289177Speter{ 302289177Speter return (adler32 * 0xd1f3da69) >> hash->shift; 303289177Speter} 304289177Speter 305289177Speter/* Allocate and initialized SIZE buckets in RESULT_POOL. 306289177Speter * Assign them to HASH. 307289177Speter */ 308289177Speterstatic void 309289177Speterallocate_hash_members(hash_t *hash, 310289177Speter apr_size_t size, 311289177Speter apr_pool_t *result_pool) 312289177Speter{ 313289177Speter apr_size_t i; 314289177Speter 315289177Speter hash->pool = result_pool; 316289177Speter hash->size = size; 317289177Speter 318289177Speter hash->prefixes = apr_pcalloc(result_pool, size); 319289177Speter hash->last_matches = apr_pcalloc(result_pool, 320289177Speter sizeof(*hash->last_matches) * size); 321289177Speter hash->offsets = apr_palloc(result_pool, sizeof(*hash->offsets) * size); 322289177Speter 323289177Speter for (i = 0; i < size; ++i) 324289177Speter hash->offsets[i] = NO_OFFSET; 325289177Speter} 326289177Speter 327289177Speter/* Initialize the HASH data structure with 2**TWOPOWER buckets allocated 328289177Speter * in RESULT_POOL. 329289177Speter */ 330289177Speterstatic void 331289177Speterinit_hash(hash_t *hash, 332289177Speter apr_size_t twoPower, 333289177Speter apr_pool_t *result_pool) 334289177Speter{ 335289177Speter hash->used = 0; 336289177Speter hash->shift = sizeof(hash_key_t) * 8 - twoPower; 337289177Speter 338289177Speter allocate_hash_members(hash, 1 << twoPower, result_pool); 339289177Speter} 340289177Speter 341289177Speter/* Make HASH have at least MIN_SIZE buckets but at least double the number 342289177Speter * of buckets in HASH by rehashing it based TEXT. 343289177Speter */ 344289177Speterstatic void 345289177Spetergrow_hash(hash_t *hash, 346289177Speter svn_stringbuf_t *text, 347289177Speter apr_size_t min_size) 348289177Speter{ 349289177Speter hash_t copy; 350289177Speter apr_size_t i; 351289177Speter 352289177Speter /* determine the new hash size */ 353289177Speter apr_size_t new_size = hash->size * 2; 354289177Speter apr_size_t new_shift = hash->shift - 1; 355289177Speter while (new_size < min_size) 356289177Speter { 357289177Speter new_size *= 2; 358289177Speter --new_shift; 359289177Speter } 360289177Speter 361289177Speter /* allocate new hash */ 362289177Speter allocate_hash_members(©, new_size, hash->pool); 363289177Speter copy.used = 0; 364289177Speter copy.shift = new_shift; 365289177Speter 366289177Speter /* copy / translate data */ 367289177Speter for (i = 0; i < hash->size; ++i) 368289177Speter { 369289177Speter apr_uint32_t offset = hash->offsets[i]; 370289177Speter if (offset != NO_OFFSET) 371289177Speter { 372289177Speter hash_key_t key = hash_key(text->data + offset); 373289177Speter size_t idx = hash_to_index(©, key); 374289177Speter 375289177Speter if (copy.offsets[idx] == NO_OFFSET) 376289177Speter copy.used++; 377289177Speter 378289177Speter copy.prefixes[idx] = hash->prefixes[i]; 379289177Speter copy.offsets[idx] = offset; 380289177Speter copy.last_matches[idx] = hash->last_matches[i]; 381289177Speter } 382289177Speter } 383289177Speter 384289177Speter *hash = copy; 385289177Speter} 386289177Speter 387289177Spetersvn_fs_x__reps_builder_t * 388289177Spetersvn_fs_x__reps_builder_create(svn_fs_t *fs, 389289177Speter apr_pool_t *result_pool) 390289177Speter{ 391289177Speter svn_fs_x__reps_builder_t *result = apr_pcalloc(result_pool, 392289177Speter sizeof(*result)); 393289177Speter 394289177Speter result->fs = fs; 395289177Speter result->text = svn_stringbuf_create_empty(result_pool); 396289177Speter init_hash(&result->hash, 4, result_pool); 397289177Speter 398289177Speter result->bases = apr_array_make(result_pool, 0, sizeof(base_t)); 399289177Speter result->reps = apr_array_make(result_pool, 0, sizeof(rep_t)); 400289177Speter result->instructions = apr_array_make(result_pool, 0, 401289177Speter sizeof(instruction_t)); 402289177Speter 403289177Speter return result; 404289177Speter} 405289177Speter 406289177Spetersvn_error_t * 407289177Spetersvn_fs_x__reps_add_base(svn_fs_x__reps_builder_t *builder, 408289177Speter svn_fs_x__representation_t *rep, 409289177Speter int priority, 410289177Speter apr_pool_t *scratch_pool) 411289177Speter{ 412289177Speter base_t base; 413289177Speter apr_size_t text_start_offset = builder->text->len; 414289177Speter 415289177Speter svn_stream_t *stream; 416289177Speter svn_string_t *contents; 417289177Speter apr_size_t idx; 418289177Speter SVN_ERR(svn_fs_x__get_contents(&stream, builder->fs, rep, FALSE, 419289177Speter scratch_pool)); 420362181Sdim SVN_ERR(svn_string_from_stream2(&contents, stream, SVN__STREAM_CHUNK_SIZE, 421362181Sdim scratch_pool)); 422289177Speter SVN_ERR(svn_fs_x__reps_add(&idx, builder, contents)); 423289177Speter 424289177Speter base.revision = svn_fs_x__get_revnum(rep->id.change_set); 425289177Speter base.item_index = rep->id.number; 426289177Speter base.priority = priority; 427289177Speter base.rep = (apr_uint32_t)idx; 428289177Speter 429289177Speter APR_ARRAY_PUSH(builder->bases, base_t) = base; 430289177Speter builder->base_text_len += builder->text->len - text_start_offset; 431289177Speter 432289177Speter return SVN_NO_ERROR; 433289177Speter} 434289177Speter 435289177Speter/* Add LEN bytes from DATA to BUILDER's text corpus. Also, add a copy 436289177Speter * operation for that text fragment. 437289177Speter */ 438289177Speterstatic void 439289177Speteradd_new_text(svn_fs_x__reps_builder_t *builder, 440289177Speter const char *data, 441289177Speter apr_size_t len) 442289177Speter{ 443289177Speter instruction_t instruction; 444289177Speter apr_size_t offset; 445289177Speter apr_size_t buckets_required; 446289177Speter 447289177Speter if (len == 0) 448289177Speter return; 449289177Speter 450289177Speter /* new instruction */ 451289177Speter instruction.offset = (apr_int32_t)builder->text->len; 452289177Speter instruction.count = (apr_uint32_t)len; 453289177Speter APR_ARRAY_PUSH(builder->instructions, instruction_t) = instruction; 454289177Speter 455289177Speter /* add to text corpus */ 456289177Speter svn_stringbuf_appendbytes(builder->text, data, len); 457289177Speter 458289177Speter /* expand the hash upfront to minimize the chances of collisions */ 459289177Speter buckets_required = builder->hash.used + len / MATCH_BLOCKSIZE; 460289177Speter if (buckets_required * 3 >= builder->hash.size * 2) 461289177Speter grow_hash(&builder->hash, builder->text, 2 * buckets_required); 462289177Speter 463289177Speter /* add hash entries for the new sequence */ 464289177Speter for (offset = instruction.offset; 465289177Speter offset + MATCH_BLOCKSIZE <= builder->text->len; 466289177Speter offset += MATCH_BLOCKSIZE) 467289177Speter { 468289177Speter hash_key_t key = hash_key(builder->text->data + offset); 469289177Speter size_t idx = hash_to_index(&builder->hash, key); 470289177Speter 471289177Speter /* Don't replace hash entries that stem from the current text. 472289177Speter * This makes early matches more likely. */ 473289177Speter if (builder->hash.offsets[idx] == NO_OFFSET) 474289177Speter ++builder->hash.used; 475289177Speter else if (builder->hash.offsets[idx] >= instruction.offset) 476289177Speter continue; 477289177Speter 478289177Speter builder->hash.offsets[idx] = (apr_uint32_t)offset; 479289177Speter builder->hash.prefixes[idx] = builder->text->data[offset]; 480289177Speter } 481289177Speter} 482289177Speter 483289177Spetersvn_error_t * 484289177Spetersvn_fs_x__reps_add(apr_size_t *rep_idx, 485289177Speter svn_fs_x__reps_builder_t *builder, 486289177Speter const svn_string_t *contents) 487289177Speter{ 488289177Speter rep_t rep; 489289177Speter const char *current = contents->data; 490289177Speter const char *processed = current; 491289177Speter const char *end = current + contents->len; 492289177Speter const char *last_to_test = end - MATCH_BLOCKSIZE - 1; 493289177Speter 494289177Speter if (builder->text->len + contents->len > MAX_TEXT_BODY) 495289177Speter return svn_error_create(SVN_ERR_FS_CONTAINER_SIZE, NULL, 496289177Speter _("Text body exceeds star delta container capacity")); 497289177Speter 498289177Speter if ( builder->instructions->nelts + 2 * contents->len / MATCH_BLOCKSIZE 499289177Speter > MAX_INSTRUCTIONS) 500289177Speter return svn_error_create(SVN_ERR_FS_CONTAINER_SIZE, NULL, 501289177Speter _("Instruction count exceeds star delta container capacity")); 502289177Speter 503289177Speter rep.first_instruction = (apr_uint32_t)builder->instructions->nelts; 504289177Speter while (current < last_to_test) 505289177Speter { 506289177Speter hash_key_t key = hash_key(current); 507289177Speter size_t offset; 508289177Speter size_t idx; 509289177Speter 510289177Speter /* search for the next matching sequence */ 511289177Speter 512289177Speter for (; current < last_to_test; ++current) 513289177Speter { 514289177Speter idx = hash_to_index(&builder->hash, key); 515289177Speter if (builder->hash.prefixes[idx] == current[0]) 516289177Speter { 517289177Speter offset = builder->hash.offsets[idx]; 518289177Speter if ( (offset != NO_OFFSET) 519289177Speter && (memcmp(&builder->text->data[offset], current, 520289177Speter MATCH_BLOCKSIZE) == 0)) 521289177Speter break; 522289177Speter } 523289177Speter key = hash_key_replace(key, current[0], current[MATCH_BLOCKSIZE]); 524289177Speter } 525289177Speter 526289177Speter /* found it? */ 527289177Speter 528289177Speter if (current < last_to_test) 529289177Speter { 530289177Speter instruction_t instruction; 531289177Speter 532289177Speter /* extend the match */ 533289177Speter 534289177Speter size_t prefix_match 535289177Speter = svn_cstring__reverse_match_length(current, 536289177Speter builder->text->data + offset, 537289177Speter MIN(offset, current - processed)); 538289177Speter size_t postfix_match 539289177Speter = svn_cstring__match_length(current + MATCH_BLOCKSIZE, 540289177Speter builder->text->data + offset + MATCH_BLOCKSIZE, 541289177Speter MIN(builder->text->len - offset - MATCH_BLOCKSIZE, 542289177Speter end - current - MATCH_BLOCKSIZE)); 543289177Speter 544289177Speter /* non-matched section */ 545289177Speter 546289177Speter size_t new_copy = (current - processed) - prefix_match; 547289177Speter if (new_copy) 548289177Speter add_new_text(builder, processed, new_copy); 549289177Speter 550289177Speter /* add instruction for matching section */ 551289177Speter 552289177Speter instruction.offset = (apr_int32_t)(offset - prefix_match); 553289177Speter instruction.count = (apr_uint32_t)(prefix_match + postfix_match + 554289177Speter MATCH_BLOCKSIZE); 555289177Speter APR_ARRAY_PUSH(builder->instructions, instruction_t) = instruction; 556289177Speter 557289177Speter processed = current + MATCH_BLOCKSIZE + postfix_match; 558289177Speter current = processed; 559289177Speter } 560289177Speter } 561289177Speter 562289177Speter add_new_text(builder, processed, end - processed); 563289177Speter rep.instruction_count = (apr_uint32_t)builder->instructions->nelts 564289177Speter - rep.first_instruction; 565289177Speter APR_ARRAY_PUSH(builder->reps, rep_t) = rep; 566289177Speter 567289177Speter *rep_idx = (apr_size_t)(builder->reps->nelts - 1); 568289177Speter return SVN_NO_ERROR; 569289177Speter} 570289177Speter 571289177Speterapr_size_t 572289177Spetersvn_fs_x__reps_estimate_size(const svn_fs_x__reps_builder_t *builder) 573289177Speter{ 574289177Speter /* approx: size of the text exclusive to us @ 50% compression rate 575289177Speter * + 2 bytes per instruction 576289177Speter * + 2 bytes per representation 577289177Speter * + 8 bytes per base representation 578289177Speter * + 1:8 inefficiency in using the base representations 579289177Speter * + 100 bytes static overhead 580289177Speter */ 581289177Speter return (builder->text->len - builder->base_text_len) / 2 582289177Speter + builder->instructions->nelts * 2 583289177Speter + builder->reps->nelts * 2 584289177Speter + builder->bases->nelts * 8 585289177Speter + builder->base_text_len / 8 586289177Speter + 100; 587289177Speter} 588289177Speter 589289177Speter/* Execute COUNT instructions starting at INSTRUCTION_IDX in CONTAINER 590289177Speter * and fill the parts of EXTRACTOR->RESULT that we can from this container. 591289177Speter * Record the remainder in EXTRACTOR->MISSING. 592289177Speter * 593289177Speter * This function will recurse for instructions that reference other 594289177Speter * instruction sequences. COUNT refers to the top-level instructions only. 595289177Speter */ 596289177Speterstatic void 597289177Speterget_text(svn_fs_x__rep_extractor_t *extractor, 598289177Speter const svn_fs_x__reps_t *container, 599289177Speter apr_size_t instruction_idx, 600289177Speter apr_size_t count) 601289177Speter{ 602289177Speter const instruction_t *instruction; 603289177Speter const char *offset_0 = container->text - container->base_text_len; 604289177Speter 605289177Speter for (instruction = container->instructions + instruction_idx; 606289177Speter instruction < container->instructions + instruction_idx + count; 607289177Speter instruction++) 608289177Speter if (instruction->offset < 0) 609289177Speter { 610289177Speter /* instruction sub-sequence */ 611289177Speter get_text(extractor, container, -instruction->offset, 612289177Speter instruction->count); 613289177Speter } 614289177Speter else if (instruction->offset >= container->base_text_len) 615289177Speter { 616289177Speter /* direct copy instruction */ 617289177Speter svn_stringbuf_appendbytes(extractor->result, 618289177Speter offset_0 + instruction->offset, 619289177Speter instruction->count); 620289177Speter } 621289177Speter else 622289177Speter { 623289177Speter /* a section that we need to fill from some external base rep. */ 624289177Speter missing_t missing; 625289177Speter missing.base = 0; 626289177Speter missing.start = (apr_uint32_t)extractor->result->len; 627289177Speter missing.count = instruction->count; 628289177Speter missing.offset = instruction->offset; 629289177Speter svn_stringbuf_appendfill(extractor->result, 0, instruction->count); 630289177Speter 631289177Speter if (extractor->missing == NULL) 632289177Speter extractor->missing = apr_array_make(extractor->pool, 1, 633289177Speter sizeof(missing)); 634289177Speter 635289177Speter APR_ARRAY_PUSH(extractor->missing, missing_t) = missing; 636289177Speter } 637289177Speter} 638289177Speter 639289177Spetersvn_error_t * 640289177Spetersvn_fs_x__reps_get(svn_fs_x__rep_extractor_t **extractor, 641289177Speter svn_fs_t *fs, 642289177Speter const svn_fs_x__reps_t *container, 643289177Speter apr_size_t idx, 644362181Sdim apr_pool_t *result_pool) 645289177Speter{ 646289177Speter apr_uint32_t first = container->first_instructions[idx]; 647289177Speter apr_uint32_t last = container->first_instructions[idx + 1]; 648289177Speter 649289177Speter /* create the extractor object */ 650362181Sdim svn_fs_x__rep_extractor_t *result = apr_pcalloc(result_pool, 651362181Sdim sizeof(*result)); 652289177Speter result->fs = fs; 653362181Sdim result->result = svn_stringbuf_create_empty(result_pool); 654362181Sdim result->pool = result_pool; 655289177Speter 656289177Speter /* fill all the bits of the result that we can, i.e. all but bits coming 657289177Speter * from base representations */ 658289177Speter get_text(result, container, first, last - first); 659289177Speter *extractor = result; 660289177Speter return SVN_NO_ERROR; 661289177Speter} 662289177Speter 663289177Spetersvn_error_t * 664289177Spetersvn_fs_x__extractor_drive(svn_stringbuf_t **contents, 665289177Speter svn_fs_x__rep_extractor_t *extractor, 666289177Speter apr_size_t start_offset, 667289177Speter apr_size_t size, 668289177Speter apr_pool_t *result_pool, 669289177Speter apr_pool_t *scratch_pool) 670289177Speter{ 671289177Speter /* we don't support base reps right now */ 672289177Speter SVN_ERR_ASSERT(extractor->missing == NULL); 673289177Speter 674289177Speter if (size == 0) 675289177Speter { 676289177Speter *contents = svn_stringbuf_dup(extractor->result, result_pool); 677289177Speter } 678289177Speter else 679289177Speter { 680289177Speter /* clip the selected range */ 681289177Speter if (start_offset > extractor->result->len) 682289177Speter start_offset = extractor->result->len; 683289177Speter 684289177Speter if (size > extractor->result->len - start_offset) 685289177Speter size = extractor->result->len - start_offset; 686289177Speter 687289177Speter *contents = svn_stringbuf_ncreate(extractor->result->data + start_offset, 688289177Speter size, result_pool); 689289177Speter } 690289177Speter 691289177Speter return SVN_NO_ERROR; 692289177Speter} 693289177Speter 694289177Spetersvn_error_t * 695289177Spetersvn_fs_x__write_reps_container(svn_stream_t *stream, 696289177Speter const svn_fs_x__reps_builder_t *builder, 697289177Speter apr_pool_t *scratch_pool) 698289177Speter{ 699289177Speter int i; 700289177Speter svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool); 701289177Speter 702289177Speter /* one top-level stream for each array */ 703289177Speter svn_packed__int_stream_t *bases_stream 704289177Speter = svn_packed__create_int_stream(root, FALSE, FALSE); 705289177Speter svn_packed__int_stream_t *reps_stream 706289177Speter = svn_packed__create_int_stream(root, TRUE, FALSE); 707289177Speter svn_packed__int_stream_t *instructions_stream 708289177Speter = svn_packed__create_int_stream(root, FALSE, FALSE); 709289177Speter 710289177Speter /* for misc stuff */ 711289177Speter svn_packed__int_stream_t *misc_stream 712289177Speter = svn_packed__create_int_stream(root, FALSE, FALSE); 713289177Speter 714289177Speter /* TEXT will be just a single string */ 715289177Speter svn_packed__byte_stream_t *text_stream 716289177Speter = svn_packed__create_bytes_stream(root); 717289177Speter 718289177Speter /* structure the struct streams such we can extract much of the redundancy 719289177Speter */ 720289177Speter svn_packed__create_int_substream(bases_stream, TRUE, TRUE); 721289177Speter svn_packed__create_int_substream(bases_stream, TRUE, FALSE); 722289177Speter svn_packed__create_int_substream(bases_stream, TRUE, FALSE); 723289177Speter svn_packed__create_int_substream(bases_stream, TRUE, FALSE); 724289177Speter 725289177Speter svn_packed__create_int_substream(instructions_stream, TRUE, TRUE); 726289177Speter svn_packed__create_int_substream(instructions_stream, FALSE, FALSE); 727289177Speter 728289177Speter /* text */ 729289177Speter svn_packed__add_bytes(text_stream, builder->text->data, builder->text->len); 730289177Speter 731289177Speter /* serialize bases */ 732289177Speter for (i = 0; i < builder->bases->nelts; ++i) 733289177Speter { 734289177Speter const base_t *base = &APR_ARRAY_IDX(builder->bases, i, base_t); 735289177Speter svn_packed__add_int(bases_stream, base->revision); 736289177Speter svn_packed__add_uint(bases_stream, base->item_index); 737289177Speter svn_packed__add_uint(bases_stream, base->priority); 738289177Speter svn_packed__add_uint(bases_stream, base->rep); 739289177Speter } 740289177Speter 741289177Speter /* serialize reps */ 742289177Speter for (i = 0; i < builder->reps->nelts; ++i) 743289177Speter { 744289177Speter const rep_t *rep = &APR_ARRAY_IDX(builder->reps, i, rep_t); 745289177Speter svn_packed__add_uint(reps_stream, rep->first_instruction); 746289177Speter } 747289177Speter 748289177Speter svn_packed__add_uint(reps_stream, builder->instructions->nelts); 749289177Speter 750289177Speter /* serialize instructions */ 751289177Speter for (i = 0; i < builder->instructions->nelts; ++i) 752289177Speter { 753289177Speter const instruction_t *instruction 754289177Speter = &APR_ARRAY_IDX(builder->instructions, i, instruction_t); 755289177Speter svn_packed__add_int(instructions_stream, instruction->offset); 756289177Speter svn_packed__add_uint(instructions_stream, instruction->count); 757289177Speter } 758289177Speter 759289177Speter /* other elements */ 760289177Speter svn_packed__add_uint(misc_stream, 0); 761289177Speter 762289177Speter /* write to stream */ 763289177Speter SVN_ERR(svn_packed__data_write(stream, root, scratch_pool)); 764289177Speter 765289177Speter return SVN_NO_ERROR; 766289177Speter} 767289177Speter 768289177Spetersvn_error_t * 769289177Spetersvn_fs_x__read_reps_container(svn_fs_x__reps_t **container, 770289177Speter svn_stream_t *stream, 771289177Speter apr_pool_t *result_pool, 772289177Speter apr_pool_t *scratch_pool) 773289177Speter{ 774289177Speter apr_size_t i; 775289177Speter 776289177Speter base_t *bases; 777289177Speter apr_uint32_t *first_instructions; 778289177Speter instruction_t *instructions; 779289177Speter 780289177Speter svn_fs_x__reps_t *reps = apr_pcalloc(result_pool, sizeof(*reps)); 781289177Speter 782289177Speter svn_packed__data_root_t *root; 783289177Speter svn_packed__int_stream_t *bases_stream; 784289177Speter svn_packed__int_stream_t *reps_stream; 785289177Speter svn_packed__int_stream_t *instructions_stream; 786289177Speter svn_packed__int_stream_t *misc_stream; 787289177Speter svn_packed__byte_stream_t *text_stream; 788289177Speter 789289177Speter /* read from disk */ 790289177Speter SVN_ERR(svn_packed__data_read(&root, stream, result_pool, scratch_pool)); 791289177Speter 792289177Speter bases_stream = svn_packed__first_int_stream(root); 793289177Speter reps_stream = svn_packed__next_int_stream(bases_stream); 794289177Speter instructions_stream = svn_packed__next_int_stream(reps_stream); 795289177Speter misc_stream = svn_packed__next_int_stream(instructions_stream); 796289177Speter text_stream = svn_packed__first_byte_stream(root); 797289177Speter 798289177Speter /* text */ 799289177Speter reps->text = svn_packed__get_bytes(text_stream, &reps->text_len); 800289177Speter reps->text = apr_pmemdup(result_pool, reps->text, reps->text_len); 801289177Speter 802289177Speter /* de-serialize bases */ 803289177Speter reps->base_count 804289177Speter = svn_packed__int_count(svn_packed__first_int_substream(bases_stream)); 805289177Speter bases = apr_palloc(result_pool, reps->base_count * sizeof(*bases)); 806289177Speter reps->bases = bases; 807289177Speter 808289177Speter for (i = 0; i < reps->base_count; ++i) 809289177Speter { 810289177Speter base_t *base = bases + i; 811289177Speter base->revision = (svn_revnum_t)svn_packed__get_int(bases_stream); 812289177Speter base->item_index = svn_packed__get_uint(bases_stream); 813289177Speter base->priority = (int)svn_packed__get_uint(bases_stream); 814289177Speter base->rep = (apr_uint32_t)svn_packed__get_uint(bases_stream); 815289177Speter } 816289177Speter 817289177Speter /* de-serialize instructions */ 818289177Speter reps->instruction_count 819289177Speter = svn_packed__int_count 820289177Speter (svn_packed__first_int_substream(instructions_stream)); 821289177Speter instructions 822289177Speter = apr_palloc(result_pool, 823289177Speter reps->instruction_count * sizeof(*instructions)); 824289177Speter reps->instructions = instructions; 825289177Speter 826289177Speter for (i = 0; i < reps->instruction_count; ++i) 827289177Speter { 828289177Speter instruction_t *instruction = instructions + i; 829289177Speter instruction->offset 830289177Speter = (apr_int32_t)svn_packed__get_int(instructions_stream); 831289177Speter instruction->count 832289177Speter = (apr_uint32_t)svn_packed__get_uint(instructions_stream); 833289177Speter } 834289177Speter 835289177Speter /* de-serialize reps */ 836289177Speter reps->rep_count = svn_packed__int_count(reps_stream); 837289177Speter first_instructions 838289177Speter = apr_palloc(result_pool, 839289177Speter (reps->rep_count + 1) * sizeof(*first_instructions)); 840289177Speter reps->first_instructions = first_instructions; 841289177Speter 842289177Speter for (i = 0; i < reps->rep_count; ++i) 843289177Speter first_instructions[i] 844289177Speter = (apr_uint32_t)svn_packed__get_uint(reps_stream); 845289177Speter first_instructions[reps->rep_count] = (apr_uint32_t)reps->instruction_count; 846289177Speter 847289177Speter /* other elements */ 848289177Speter reps->base_text_len = (apr_size_t)svn_packed__get_uint(misc_stream); 849289177Speter 850289177Speter /* return result */ 851289177Speter *container = reps; 852289177Speter 853289177Speter return SVN_NO_ERROR; 854289177Speter} 855289177Speter 856289177Spetersvn_error_t * 857289177Spetersvn_fs_x__serialize_reps_container(void **data, 858289177Speter apr_size_t *data_len, 859289177Speter void *in, 860289177Speter apr_pool_t *pool) 861289177Speter{ 862289177Speter svn_fs_x__reps_t *reps = in; 863289177Speter svn_stringbuf_t *serialized; 864289177Speter 865289177Speter /* make a guesstimate on the size of the serialized data. Erring on the 866289177Speter * low side will cause the serializer to re-alloc its buffer. */ 867289177Speter apr_size_t size 868289177Speter = reps->text_len 869289177Speter + reps->base_count * sizeof(*reps->bases) 870289177Speter + reps->rep_count * sizeof(*reps->first_instructions) 871289177Speter + reps->instruction_count * sizeof(*reps->instructions) 872289177Speter + 100; 873289177Speter 874289177Speter /* serialize array header and all its elements */ 875289177Speter svn_temp_serializer__context_t *context 876289177Speter = svn_temp_serializer__init(reps, sizeof(*reps), size, pool); 877289177Speter 878289177Speter /* serialize sub-structures */ 879289177Speter svn_temp_serializer__add_leaf(context, (const void **)&reps->text, 880289177Speter reps->text_len); 881289177Speter svn_temp_serializer__add_leaf(context, (const void **)&reps->bases, 882289177Speter reps->base_count * sizeof(*reps->bases)); 883289177Speter svn_temp_serializer__add_leaf(context, 884289177Speter (const void **)&reps->first_instructions, 885289177Speter reps->rep_count * 886289177Speter sizeof(*reps->first_instructions)); 887289177Speter svn_temp_serializer__add_leaf(context, (const void **)&reps->instructions, 888289177Speter reps->instruction_count * 889289177Speter sizeof(*reps->instructions)); 890289177Speter 891289177Speter /* return the serialized result */ 892289177Speter serialized = svn_temp_serializer__get(context); 893289177Speter 894289177Speter *data = serialized->data; 895289177Speter *data_len = serialized->len; 896289177Speter 897289177Speter return SVN_NO_ERROR; 898289177Speter} 899289177Speter 900289177Spetersvn_error_t * 901289177Spetersvn_fs_x__deserialize_reps_container(void **out, 902289177Speter void *data, 903289177Speter apr_size_t data_len, 904362181Sdim apr_pool_t *result_pool) 905289177Speter{ 906289177Speter svn_fs_x__reps_t *reps = (svn_fs_x__reps_t *)data; 907289177Speter 908289177Speter /* de-serialize sub-structures */ 909289177Speter svn_temp_deserializer__resolve(reps, (void **)&reps->text); 910289177Speter svn_temp_deserializer__resolve(reps, (void **)&reps->bases); 911289177Speter svn_temp_deserializer__resolve(reps, (void **)&reps->first_instructions); 912289177Speter svn_temp_deserializer__resolve(reps, (void **)&reps->instructions); 913289177Speter 914289177Speter /* done */ 915289177Speter *out = reps; 916289177Speter 917289177Speter return SVN_NO_ERROR; 918289177Speter} 919289177Speter 920289177Spetersvn_error_t * 921289177Spetersvn_fs_x__reps_get_func(void **out, 922289177Speter const void *data, 923289177Speter apr_size_t data_len, 924289177Speter void *baton, 925289177Speter apr_pool_t *pool) 926289177Speter{ 927289177Speter svn_fs_x__reps_baton_t *reps_baton = baton; 928289177Speter 929289177Speter /* get a usable reps structure */ 930289177Speter const svn_fs_x__reps_t *cached = data; 931289177Speter svn_fs_x__reps_t *reps = apr_pmemdup(pool, cached, sizeof(*reps)); 932289177Speter 933289177Speter reps->text 934289177Speter = svn_temp_deserializer__ptr(cached, (const void **)&cached->text); 935289177Speter reps->bases 936289177Speter = svn_temp_deserializer__ptr(cached, (const void **)&cached->bases); 937289177Speter reps->first_instructions 938289177Speter = svn_temp_deserializer__ptr(cached, 939289177Speter (const void **)&cached->first_instructions); 940289177Speter reps->instructions 941289177Speter = svn_temp_deserializer__ptr(cached, 942289177Speter (const void **)&cached->instructions); 943289177Speter 944289177Speter /* return an extractor for the selected item */ 945289177Speter SVN_ERR(svn_fs_x__reps_get((svn_fs_x__rep_extractor_t **)out, 946289177Speter reps_baton->fs, reps, reps_baton->idx, pool)); 947289177Speter 948289177Speter return SVN_NO_ERROR; 949289177Speter} 950