text_delta.c revision 251881
1251881Speter/* 2251881Speter * text-delta.c -- Internal text delta representation 3251881Speter * 4251881Speter * ==================================================================== 5251881Speter * Licensed to the Apache Software Foundation (ASF) under one 6251881Speter * or more contributor license agreements. See the NOTICE file 7251881Speter * distributed with this work for additional information 8251881Speter * regarding copyright ownership. The ASF licenses this file 9251881Speter * to you under the Apache License, Version 2.0 (the 10251881Speter * "License"); you may not use this file except in compliance 11251881Speter * with the License. You may obtain a copy of the License at 12251881Speter * 13251881Speter * http://www.apache.org/licenses/LICENSE-2.0 14251881Speter * 15251881Speter * Unless required by applicable law or agreed to in writing, 16251881Speter * software distributed under the License is distributed on an 17251881Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18251881Speter * KIND, either express or implied. See the License for the 19251881Speter * specific language governing permissions and limitations 20251881Speter * under the License. 21251881Speter * ==================================================================== 22251881Speter */ 23251881Speter 24251881Speter 25251881Speter#include <assert.h> 26251881Speter#include <string.h> 27251881Speter 28251881Speter#include <apr_general.h> /* for APR_INLINE */ 29251881Speter#include <apr_md5.h> /* for, um...MD5 stuff */ 30251881Speter 31251881Speter#include "svn_delta.h" 32251881Speter#include "svn_io.h" 33251881Speter#include "svn_pools.h" 34251881Speter#include "svn_checksum.h" 35251881Speter 36251881Speter#include "delta.h" 37251881Speter 38251881Speter 39251881Speter/* Text delta stream descriptor. */ 40251881Speter 41251881Speterstruct svn_txdelta_stream_t { 42251881Speter /* Copied from parameters to svn_txdelta_stream_create. */ 43251881Speter void *baton; 44251881Speter svn_txdelta_next_window_fn_t next_window; 45251881Speter svn_txdelta_md5_digest_fn_t md5_digest; 46251881Speter}; 47251881Speter 48251881Speter/* Delta stream baton. */ 49251881Speterstruct txdelta_baton { 50251881Speter /* These are copied from parameters passed to svn_txdelta. */ 51251881Speter svn_stream_t *source; 52251881Speter svn_stream_t *target; 53251881Speter 54251881Speter /* Private data */ 55251881Speter svn_boolean_t more_source; /* FALSE if source stream hit EOF. */ 56251881Speter svn_boolean_t more; /* TRUE if there are more data in the pool. */ 57251881Speter svn_filesize_t pos; /* Offset of next read in source file. */ 58251881Speter char *buf; /* Buffer for input data. */ 59251881Speter 60251881Speter svn_checksum_ctx_t *context; /* If not NULL, the context for computing 61251881Speter the checksum. */ 62251881Speter svn_checksum_t *checksum; /* If non-NULL, the checksum of TARGET. */ 63251881Speter 64251881Speter apr_pool_t *result_pool; /* For results (e.g. checksum) */ 65251881Speter}; 66251881Speter 67251881Speter 68251881Speter/* Target-push stream descriptor. */ 69251881Speter 70251881Speterstruct tpush_baton { 71251881Speter /* These are copied from parameters passed to svn_txdelta_target_push. */ 72251881Speter svn_stream_t *source; 73251881Speter svn_txdelta_window_handler_t wh; 74251881Speter void *whb; 75251881Speter apr_pool_t *pool; 76251881Speter 77251881Speter /* Private data */ 78251881Speter char *buf; 79251881Speter svn_filesize_t source_offset; 80251881Speter apr_size_t source_len; 81251881Speter svn_boolean_t source_done; 82251881Speter apr_size_t target_len; 83251881Speter}; 84251881Speter 85251881Speter 86251881Speter/* Text delta applicator. */ 87251881Speter 88251881Speterstruct apply_baton { 89251881Speter /* These are copied from parameters passed to svn_txdelta_apply. */ 90251881Speter svn_stream_t *source; 91251881Speter svn_stream_t *target; 92251881Speter 93251881Speter /* Private data. Between calls, SBUF contains the data from the 94251881Speter * last window's source view, as specified by SBUF_OFFSET and 95251881Speter * SBUF_LEN. The contents of TBUF are not interesting between 96251881Speter * calls. */ 97251881Speter apr_pool_t *pool; /* Pool to allocate data from */ 98251881Speter char *sbuf; /* Source buffer */ 99251881Speter apr_size_t sbuf_size; /* Allocated source buffer space */ 100251881Speter svn_filesize_t sbuf_offset; /* Offset of SBUF data in source stream */ 101251881Speter apr_size_t sbuf_len; /* Length of SBUF data */ 102251881Speter char *tbuf; /* Target buffer */ 103251881Speter apr_size_t tbuf_size; /* Allocated target buffer space */ 104251881Speter 105251881Speter apr_md5_ctx_t md5_context; /* Leads to result_digest below. */ 106251881Speter unsigned char *result_digest; /* MD5 digest of resultant fulltext; 107251881Speter must point to at least APR_MD5_DIGESTSIZE 108251881Speter bytes of storage. */ 109251881Speter 110251881Speter const char *error_info; /* Optional extra info for error returns. */ 111251881Speter}; 112251881Speter 113251881Speter 114251881Speter 115251881Spetersvn_txdelta_window_t * 116251881Spetersvn_txdelta__make_window(const svn_txdelta__ops_baton_t *build_baton, 117251881Speter apr_pool_t *pool) 118251881Speter{ 119251881Speter svn_txdelta_window_t *window; 120251881Speter svn_string_t *new_data = apr_palloc(pool, sizeof(*new_data)); 121251881Speter 122251881Speter window = apr_palloc(pool, sizeof(*window)); 123251881Speter window->sview_offset = 0; 124251881Speter window->sview_len = 0; 125251881Speter window->tview_len = 0; 126251881Speter 127251881Speter window->num_ops = build_baton->num_ops; 128251881Speter window->src_ops = build_baton->src_ops; 129251881Speter window->ops = build_baton->ops; 130251881Speter 131251881Speter /* just copy the fields over, rather than alloc/copying into a whole new 132251881Speter svn_string_t structure. */ 133251881Speter /* ### would be much nicer if window->new_data were not a ptr... */ 134251881Speter new_data->data = build_baton->new_data->data; 135251881Speter new_data->len = build_baton->new_data->len; 136251881Speter window->new_data = new_data; 137251881Speter 138251881Speter return window; 139251881Speter} 140251881Speter 141251881Speter 142251881Speter/* Compute and return a delta window using the xdelta algorithm on 143251881Speter DATA, which contains SOURCE_LEN bytes of source data and TARGET_LEN 144251881Speter bytes of target data. SOURCE_OFFSET gives the offset of the source 145251881Speter data, and is simply copied into the window's sview_offset field. */ 146251881Speterstatic svn_txdelta_window_t * 147251881Spetercompute_window(const char *data, apr_size_t source_len, apr_size_t target_len, 148251881Speter svn_filesize_t source_offset, apr_pool_t *pool) 149251881Speter{ 150251881Speter svn_txdelta__ops_baton_t build_baton = { 0 }; 151251881Speter svn_txdelta_window_t *window; 152251881Speter 153251881Speter /* Compute the delta operations. */ 154251881Speter build_baton.new_data = svn_stringbuf_create_empty(pool); 155251881Speter 156251881Speter if (source_len == 0) 157251881Speter svn_txdelta__insert_op(&build_baton, svn_txdelta_new, 0, target_len, data, 158251881Speter pool); 159251881Speter else 160251881Speter svn_txdelta__xdelta(&build_baton, data, source_len, target_len, pool); 161251881Speter 162251881Speter /* Create and return the delta window. */ 163251881Speter window = svn_txdelta__make_window(&build_baton, pool); 164251881Speter window->sview_offset = source_offset; 165251881Speter window->sview_len = source_len; 166251881Speter window->tview_len = target_len; 167251881Speter return window; 168251881Speter} 169251881Speter 170251881Speter 171251881Speter 172251881Spetersvn_txdelta_window_t * 173251881Spetersvn_txdelta_window_dup(const svn_txdelta_window_t *window, 174251881Speter apr_pool_t *pool) 175251881Speter{ 176251881Speter svn_txdelta__ops_baton_t build_baton = { 0 }; 177251881Speter svn_txdelta_window_t *new_window; 178251881Speter const apr_size_t ops_size = (window->num_ops * sizeof(*build_baton.ops)); 179251881Speter 180251881Speter build_baton.num_ops = window->num_ops; 181251881Speter build_baton.src_ops = window->src_ops; 182251881Speter build_baton.ops_size = window->num_ops; 183251881Speter build_baton.ops = apr_palloc(pool, ops_size); 184251881Speter memcpy(build_baton.ops, window->ops, ops_size); 185251881Speter build_baton.new_data = 186251881Speter svn_stringbuf_create_from_string(window->new_data, pool); 187251881Speter 188251881Speter new_window = svn_txdelta__make_window(&build_baton, pool); 189251881Speter new_window->sview_offset = window->sview_offset; 190251881Speter new_window->sview_len = window->sview_len; 191251881Speter new_window->tview_len = window->tview_len; 192251881Speter return new_window; 193251881Speter} 194251881Speter 195251881Speter/* This is a private interlibrary compatibility wrapper. */ 196251881Spetersvn_txdelta_window_t * 197251881Spetersvn_txdelta__copy_window(const svn_txdelta_window_t *window, 198251881Speter apr_pool_t *pool); 199251881Spetersvn_txdelta_window_t * 200251881Spetersvn_txdelta__copy_window(const svn_txdelta_window_t *window, 201251881Speter apr_pool_t *pool) 202251881Speter{ 203251881Speter return svn_txdelta_window_dup(window, pool); 204251881Speter} 205251881Speter 206251881Speter 207251881Speter/* Insert a delta op into a delta window. */ 208251881Speter 209251881Spetervoid 210251881Spetersvn_txdelta__insert_op(svn_txdelta__ops_baton_t *build_baton, 211251881Speter enum svn_delta_action opcode, 212251881Speter apr_size_t offset, 213251881Speter apr_size_t length, 214251881Speter const char *new_data, 215251881Speter apr_pool_t *pool) 216251881Speter{ 217251881Speter svn_txdelta_op_t *op; 218251881Speter 219251881Speter /* Check if this op can be merged with the previous op. The delta 220251881Speter combiner sometimes generates such ops, and this is the obvious 221251881Speter place to make the check. */ 222251881Speter if (build_baton->num_ops > 0) 223251881Speter { 224251881Speter op = &build_baton->ops[build_baton->num_ops - 1]; 225251881Speter if (op->action_code == opcode 226251881Speter && (opcode == svn_txdelta_new 227251881Speter || op->offset + op->length == offset)) 228251881Speter { 229251881Speter op->length += length; 230251881Speter if (opcode == svn_txdelta_new) 231251881Speter svn_stringbuf_appendbytes(build_baton->new_data, 232251881Speter new_data, length); 233251881Speter return; 234251881Speter } 235251881Speter } 236251881Speter 237251881Speter /* Create space for the new op. */ 238251881Speter if (build_baton->num_ops == build_baton->ops_size) 239251881Speter { 240251881Speter svn_txdelta_op_t *const old_ops = build_baton->ops; 241251881Speter int const new_ops_size = (build_baton->ops_size == 0 242251881Speter ? 16 : 2 * build_baton->ops_size); 243251881Speter build_baton->ops = 244251881Speter apr_palloc(pool, new_ops_size * sizeof(*build_baton->ops)); 245251881Speter 246251881Speter /* Copy any existing ops into the new array */ 247251881Speter if (old_ops) 248251881Speter memcpy(build_baton->ops, old_ops, 249251881Speter build_baton->ops_size * sizeof(*build_baton->ops)); 250251881Speter build_baton->ops_size = new_ops_size; 251251881Speter } 252251881Speter 253251881Speter /* Insert the op. svn_delta_source and svn_delta_target are 254251881Speter just inserted. For svn_delta_new, the new data must be 255251881Speter copied into the window. */ 256251881Speter op = &build_baton->ops[build_baton->num_ops]; 257251881Speter switch (opcode) 258251881Speter { 259251881Speter case svn_txdelta_source: 260251881Speter ++build_baton->src_ops; 261251881Speter /*** FALLTHRU ***/ 262251881Speter case svn_txdelta_target: 263251881Speter op->action_code = opcode; 264251881Speter op->offset = offset; 265251881Speter op->length = length; 266251881Speter break; 267251881Speter case svn_txdelta_new: 268251881Speter op->action_code = opcode; 269251881Speter op->offset = build_baton->new_data->len; 270251881Speter op->length = length; 271251881Speter svn_stringbuf_appendbytes(build_baton->new_data, new_data, length); 272251881Speter break; 273251881Speter default: 274251881Speter assert(!"unknown delta op."); 275251881Speter } 276251881Speter 277251881Speter ++build_baton->num_ops; 278251881Speter} 279251881Speter 280251881Speterapr_size_t 281251881Spetersvn_txdelta__remove_copy(svn_txdelta__ops_baton_t *build_baton, 282251881Speter apr_size_t max_len) 283251881Speter{ 284251881Speter svn_txdelta_op_t *op; 285251881Speter apr_size_t len = 0; 286251881Speter 287251881Speter /* remove ops back to front */ 288251881Speter while (build_baton->num_ops > 0) 289251881Speter { 290251881Speter op = &build_baton->ops[build_baton->num_ops-1]; 291251881Speter 292251881Speter /* we can't modify svn_txdelta_target ops -> stop there */ 293251881Speter if (op->action_code == svn_txdelta_target) 294251881Speter break; 295251881Speter 296251881Speter /* handle the case that we cannot remove the op entirely */ 297251881Speter if (op->length + len > max_len) 298251881Speter { 299251881Speter /* truncate only insertions. Copies don't benefit 300251881Speter from being truncated. */ 301251881Speter if (op->action_code == svn_txdelta_new) 302251881Speter { 303251881Speter build_baton->new_data->len -= max_len - len; 304251881Speter op->length -= max_len - len; 305251881Speter len = max_len; 306251881Speter } 307251881Speter 308251881Speter break; 309251881Speter } 310251881Speter 311251881Speter /* drop the op entirely */ 312251881Speter if (op->action_code == svn_txdelta_new) 313251881Speter build_baton->new_data->len -= op->length; 314251881Speter 315251881Speter len += op->length; 316251881Speter --build_baton->num_ops; 317251881Speter } 318251881Speter 319251881Speter return len; 320251881Speter} 321251881Speter 322251881Speter 323251881Speter 324251881Speter/* Generic delta stream functions. */ 325251881Speter 326251881Spetersvn_txdelta_stream_t * 327251881Spetersvn_txdelta_stream_create(void *baton, 328251881Speter svn_txdelta_next_window_fn_t next_window, 329251881Speter svn_txdelta_md5_digest_fn_t md5_digest, 330251881Speter apr_pool_t *pool) 331251881Speter{ 332251881Speter svn_txdelta_stream_t *stream = apr_palloc(pool, sizeof(*stream)); 333251881Speter 334251881Speter stream->baton = baton; 335251881Speter stream->next_window = next_window; 336251881Speter stream->md5_digest = md5_digest; 337251881Speter 338251881Speter return stream; 339251881Speter} 340251881Speter 341251881Spetersvn_error_t * 342251881Spetersvn_txdelta_next_window(svn_txdelta_window_t **window, 343251881Speter svn_txdelta_stream_t *stream, 344251881Speter apr_pool_t *pool) 345251881Speter{ 346251881Speter return stream->next_window(window, stream->baton, pool); 347251881Speter} 348251881Speter 349251881Speterconst unsigned char * 350251881Spetersvn_txdelta_md5_digest(svn_txdelta_stream_t *stream) 351251881Speter{ 352251881Speter return stream->md5_digest(stream->baton); 353251881Speter} 354251881Speter 355251881Speter 356251881Speter 357251881Speterstatic svn_error_t * 358251881Spetertxdelta_next_window(svn_txdelta_window_t **window, 359251881Speter void *baton, 360251881Speter apr_pool_t *pool) 361251881Speter{ 362251881Speter struct txdelta_baton *b = baton; 363251881Speter apr_size_t source_len = SVN_DELTA_WINDOW_SIZE; 364251881Speter apr_size_t target_len = SVN_DELTA_WINDOW_SIZE; 365251881Speter 366251881Speter /* Read the source stream. */ 367251881Speter if (b->more_source) 368251881Speter { 369251881Speter SVN_ERR(svn_stream_read(b->source, b->buf, &source_len)); 370251881Speter b->more_source = (source_len == SVN_DELTA_WINDOW_SIZE); 371251881Speter } 372251881Speter else 373251881Speter source_len = 0; 374251881Speter 375251881Speter /* Read the target stream. */ 376251881Speter SVN_ERR(svn_stream_read(b->target, b->buf + source_len, &target_len)); 377251881Speter b->pos += source_len; 378251881Speter 379251881Speter if (target_len == 0) 380251881Speter { 381251881Speter /* No target data? We're done; return the final window. */ 382251881Speter if (b->context != NULL) 383251881Speter SVN_ERR(svn_checksum_final(&b->checksum, b->context, b->result_pool)); 384251881Speter 385251881Speter *window = NULL; 386251881Speter b->more = FALSE; 387251881Speter return SVN_NO_ERROR; 388251881Speter } 389251881Speter else if (b->context != NULL) 390251881Speter SVN_ERR(svn_checksum_update(b->context, b->buf + source_len, target_len)); 391251881Speter 392251881Speter *window = compute_window(b->buf, source_len, target_len, 393251881Speter b->pos - source_len, pool); 394251881Speter 395251881Speter /* That's it. */ 396251881Speter return SVN_NO_ERROR; 397251881Speter} 398251881Speter 399251881Speter 400251881Speterstatic const unsigned char * 401251881Spetertxdelta_md5_digest(void *baton) 402251881Speter{ 403251881Speter struct txdelta_baton *b = baton; 404251881Speter /* If there are more windows for this stream, the digest has not yet 405251881Speter been calculated. */ 406251881Speter if (b->more) 407251881Speter return NULL; 408251881Speter 409251881Speter /* If checksumming has not been activated, there will be no digest. */ 410251881Speter if (b->context == NULL) 411251881Speter return NULL; 412251881Speter 413251881Speter /* The checksum should be there. */ 414251881Speter return b->checksum->digest; 415251881Speter} 416251881Speter 417251881Speter 418251881Spetersvn_error_t * 419251881Spetersvn_txdelta_run(svn_stream_t *source, 420251881Speter svn_stream_t *target, 421251881Speter svn_txdelta_window_handler_t handler, 422251881Speter void *handler_baton, 423251881Speter svn_checksum_kind_t checksum_kind, 424251881Speter svn_checksum_t **checksum, 425251881Speter svn_cancel_func_t cancel_func, 426251881Speter void *cancel_baton, 427251881Speter apr_pool_t *result_pool, 428251881Speter apr_pool_t *scratch_pool) 429251881Speter{ 430251881Speter apr_pool_t *iterpool = svn_pool_create(scratch_pool); 431251881Speter struct txdelta_baton tb = { 0 }; 432251881Speter svn_txdelta_window_t *window; 433251881Speter 434251881Speter tb.source = source; 435251881Speter tb.target = target; 436251881Speter tb.more_source = TRUE; 437251881Speter tb.more = TRUE; 438251881Speter tb.pos = 0; 439251881Speter tb.buf = apr_palloc(scratch_pool, 2 * SVN_DELTA_WINDOW_SIZE); 440251881Speter tb.result_pool = result_pool; 441251881Speter 442251881Speter if (checksum != NULL) 443251881Speter tb.context = svn_checksum_ctx_create(checksum_kind, scratch_pool); 444251881Speter 445251881Speter do 446251881Speter { 447251881Speter /* free the window (if any) */ 448251881Speter svn_pool_clear(iterpool); 449251881Speter 450251881Speter /* read in a single delta window */ 451251881Speter SVN_ERR(txdelta_next_window(&window, &tb, iterpool)); 452251881Speter 453251881Speter /* shove it at the handler */ 454251881Speter SVN_ERR((*handler)(window, handler_baton)); 455251881Speter 456251881Speter if (cancel_func) 457251881Speter SVN_ERR(cancel_func(cancel_baton)); 458251881Speter } 459251881Speter while (window != NULL); 460251881Speter 461251881Speter svn_pool_destroy(iterpool); 462251881Speter 463251881Speter if (checksum != NULL) 464251881Speter *checksum = tb.checksum; /* should be there! */ 465251881Speter 466251881Speter return SVN_NO_ERROR; 467251881Speter} 468251881Speter 469251881Speter 470251881Spetervoid 471251881Spetersvn_txdelta2(svn_txdelta_stream_t **stream, 472251881Speter svn_stream_t *source, 473251881Speter svn_stream_t *target, 474251881Speter svn_boolean_t calculate_checksum, 475251881Speter apr_pool_t *pool) 476251881Speter{ 477251881Speter struct txdelta_baton *b = apr_pcalloc(pool, sizeof(*b)); 478251881Speter 479251881Speter b->source = source; 480251881Speter b->target = target; 481251881Speter b->more_source = TRUE; 482251881Speter b->more = TRUE; 483251881Speter b->buf = apr_palloc(pool, 2 * SVN_DELTA_WINDOW_SIZE); 484251881Speter b->context = calculate_checksum 485251881Speter ? svn_checksum_ctx_create(svn_checksum_md5, pool) 486251881Speter : NULL; 487251881Speter b->result_pool = pool; 488251881Speter 489251881Speter *stream = svn_txdelta_stream_create(b, txdelta_next_window, 490251881Speter txdelta_md5_digest, pool); 491251881Speter} 492251881Speter 493251881Spetervoid 494251881Spetersvn_txdelta(svn_txdelta_stream_t **stream, 495251881Speter svn_stream_t *source, 496251881Speter svn_stream_t *target, 497251881Speter apr_pool_t *pool) 498251881Speter{ 499251881Speter svn_txdelta2(stream, source, target, TRUE, pool); 500251881Speter} 501251881Speter 502251881Speter 503251881Speter 504251881Speter/* Functions for implementing a "target push" delta. */ 505251881Speter 506251881Speter/* This is the write handler for a target-push delta stream. It reads 507251881Speter * source data, buffers target data, and fires off delta windows when 508251881Speter * the target data buffer is full. */ 509251881Speterstatic svn_error_t * 510251881Spetertpush_write_handler(void *baton, const char *data, apr_size_t *len) 511251881Speter{ 512251881Speter struct tpush_baton *tb = baton; 513251881Speter apr_size_t chunk_len, data_len = *len; 514251881Speter apr_pool_t *pool = svn_pool_create(tb->pool); 515251881Speter svn_txdelta_window_t *window; 516251881Speter 517251881Speter while (data_len > 0) 518251881Speter { 519251881Speter svn_pool_clear(pool); 520251881Speter 521251881Speter /* Make sure we're all full up on source data, if possible. */ 522251881Speter if (tb->source_len == 0 && !tb->source_done) 523251881Speter { 524251881Speter tb->source_len = SVN_DELTA_WINDOW_SIZE; 525251881Speter SVN_ERR(svn_stream_read(tb->source, tb->buf, &tb->source_len)); 526251881Speter if (tb->source_len < SVN_DELTA_WINDOW_SIZE) 527251881Speter tb->source_done = TRUE; 528251881Speter } 529251881Speter 530251881Speter /* Copy in the target data, up to SVN_DELTA_WINDOW_SIZE. */ 531251881Speter chunk_len = SVN_DELTA_WINDOW_SIZE - tb->target_len; 532251881Speter if (chunk_len > data_len) 533251881Speter chunk_len = data_len; 534251881Speter memcpy(tb->buf + tb->source_len + tb->target_len, data, chunk_len); 535251881Speter data += chunk_len; 536251881Speter data_len -= chunk_len; 537251881Speter tb->target_len += chunk_len; 538251881Speter 539251881Speter /* If we're full of target data, compute and fire off a window. */ 540251881Speter if (tb->target_len == SVN_DELTA_WINDOW_SIZE) 541251881Speter { 542251881Speter window = compute_window(tb->buf, tb->source_len, tb->target_len, 543251881Speter tb->source_offset, pool); 544251881Speter SVN_ERR(tb->wh(window, tb->whb)); 545251881Speter tb->source_offset += tb->source_len; 546251881Speter tb->source_len = 0; 547251881Speter tb->target_len = 0; 548251881Speter } 549251881Speter } 550251881Speter 551251881Speter svn_pool_destroy(pool); 552251881Speter return SVN_NO_ERROR; 553251881Speter} 554251881Speter 555251881Speter 556251881Speter/* This is the close handler for a target-push delta stream. It sends 557251881Speter * a final window if there is any buffered target data, and then sends 558251881Speter * a NULL window signifying the end of the window stream. */ 559251881Speterstatic svn_error_t * 560251881Spetertpush_close_handler(void *baton) 561251881Speter{ 562251881Speter struct tpush_baton *tb = baton; 563251881Speter svn_txdelta_window_t *window; 564251881Speter 565251881Speter /* Send a final window if we have any residual target data. */ 566251881Speter if (tb->target_len > 0) 567251881Speter { 568251881Speter window = compute_window(tb->buf, tb->source_len, tb->target_len, 569251881Speter tb->source_offset, tb->pool); 570251881Speter SVN_ERR(tb->wh(window, tb->whb)); 571251881Speter } 572251881Speter 573251881Speter /* Send a final NULL window signifying the end. */ 574251881Speter return tb->wh(NULL, tb->whb); 575251881Speter} 576251881Speter 577251881Speter 578251881Spetersvn_stream_t * 579251881Spetersvn_txdelta_target_push(svn_txdelta_window_handler_t handler, 580251881Speter void *handler_baton, svn_stream_t *source, 581251881Speter apr_pool_t *pool) 582251881Speter{ 583251881Speter struct tpush_baton *tb; 584251881Speter svn_stream_t *stream; 585251881Speter 586251881Speter /* Initialize baton. */ 587251881Speter tb = apr_palloc(pool, sizeof(*tb)); 588251881Speter tb->source = source; 589251881Speter tb->wh = handler; 590251881Speter tb->whb = handler_baton; 591251881Speter tb->pool = pool; 592251881Speter tb->buf = apr_palloc(pool, 2 * SVN_DELTA_WINDOW_SIZE); 593251881Speter tb->source_offset = 0; 594251881Speter tb->source_len = 0; 595251881Speter tb->source_done = FALSE; 596251881Speter tb->target_len = 0; 597251881Speter 598251881Speter /* Create and return writable stream. */ 599251881Speter stream = svn_stream_create(tb, pool); 600251881Speter svn_stream_set_write(stream, tpush_write_handler); 601251881Speter svn_stream_set_close(stream, tpush_close_handler); 602251881Speter return stream; 603251881Speter} 604251881Speter 605251881Speter 606251881Speter 607251881Speter/* Functions for applying deltas. */ 608251881Speter 609251881Speter/* Ensure that BUF has enough space for VIEW_LEN bytes. */ 610251881Speterstatic APR_INLINE svn_error_t * 611251881Spetersize_buffer(char **buf, apr_size_t *buf_size, 612251881Speter apr_size_t view_len, apr_pool_t *pool) 613251881Speter{ 614251881Speter if (view_len > *buf_size) 615251881Speter { 616251881Speter *buf_size *= 2; 617251881Speter if (*buf_size < view_len) 618251881Speter *buf_size = view_len; 619251881Speter SVN_ERR_ASSERT(APR_ALIGN_DEFAULT(*buf_size) >= *buf_size); 620251881Speter *buf = apr_palloc(pool, *buf_size); 621251881Speter } 622251881Speter 623251881Speter return SVN_NO_ERROR; 624251881Speter} 625251881Speter 626251881Speter/* Copy LEN bytes from SOURCE to TARGET, optimizing for the case where LEN 627251881Speter * is often very small. Return a pointer to the first byte after the copied 628251881Speter * target range, unlike standard memcpy(), as a potential further 629251881Speter * optimization for the caller. 630251881Speter * 631251881Speter * memcpy() is hard to tune for a wide range of buffer lengths. Therefore, 632251881Speter * it is often tuned for high throughput on large buffers and relatively 633251881Speter * low latency for mid-sized buffers (tens of bytes). However, the overhead 634251881Speter * for very small buffers (<10 bytes) is still high. Even passing the 635251881Speter * parameters, for instance, may take as long as copying 3 bytes. 636251881Speter * 637251881Speter * Because short copy sequences seem to be a common case, at least in 638251881Speter * "format 2" FSFS repositories, we copy them directly. Larger buffer sizes 639251881Speter * aren't hurt measurably by the exta 'if' clause. */ 640251881Speterstatic APR_INLINE char * 641251881Speterfast_memcpy(char *target, const char *source, apr_size_t len) 642251881Speter{ 643251881Speter if (len > 7) 644251881Speter { 645251881Speter memcpy(target, source, len); 646251881Speter target += len; 647251881Speter } 648251881Speter else 649251881Speter { 650251881Speter /* memcpy is not exactly fast for small block sizes. 651251881Speter * Since they are common, let's run optimized code for them. */ 652251881Speter const char *end = source + len; 653251881Speter for (; source != end; source++) 654251881Speter *(target++) = *source; 655251881Speter } 656251881Speter 657251881Speter return target; 658251881Speter} 659251881Speter 660251881Speter/* Copy LEN bytes from SOURCE to TARGET. Unlike memmove() or memcpy(), 661251881Speter * create repeating patterns if the source and target ranges overlap. 662251881Speter * Return a pointer to the first byte after the copied target range. */ 663251881Speterstatic APR_INLINE char * 664251881Speterpatterning_copy(char *target, const char *source, apr_size_t len) 665251881Speter{ 666251881Speter const char *end = source + len; 667251881Speter 668251881Speter /* On many machines, we can do "chunky" copies. */ 669251881Speter 670251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 671251881Speter 672251881Speter if (end + sizeof(apr_uint32_t) <= target) 673251881Speter { 674251881Speter /* Source and target are at least 4 bytes apart, so we can copy in 675251881Speter * 4-byte chunks. */ 676251881Speter for (; source + sizeof(apr_uint32_t) <= end; 677251881Speter source += sizeof(apr_uint32_t), 678251881Speter target += sizeof(apr_uint32_t)) 679251881Speter *(apr_uint32_t *)(target) = *(apr_uint32_t *)(source); 680251881Speter } 681251881Speter 682251881Speter#endif 683251881Speter 684251881Speter /* fall through to byte-wise copy (either for the below-chunk-size tail 685251881Speter * or the whole copy) */ 686251881Speter for (; source != end; source++) 687251881Speter *(target++) = *source; 688251881Speter 689251881Speter return target; 690251881Speter} 691251881Speter 692251881Spetervoid 693251881Spetersvn_txdelta_apply_instructions(svn_txdelta_window_t *window, 694251881Speter const char *sbuf, char *tbuf, 695251881Speter apr_size_t *tlen) 696251881Speter{ 697251881Speter const svn_txdelta_op_t *op; 698251881Speter apr_size_t tpos = 0; 699251881Speter 700251881Speter for (op = window->ops; op < window->ops + window->num_ops; op++) 701251881Speter { 702251881Speter const apr_size_t buf_len = (op->length < *tlen - tpos 703251881Speter ? op->length : *tlen - tpos); 704251881Speter 705251881Speter /* Check some invariants common to all instructions. */ 706251881Speter assert(tpos + op->length <= window->tview_len); 707251881Speter 708251881Speter switch (op->action_code) 709251881Speter { 710251881Speter case svn_txdelta_source: 711251881Speter /* Copy from source area. */ 712251881Speter assert(sbuf); 713251881Speter assert(op->offset + op->length <= window->sview_len); 714251881Speter fast_memcpy(tbuf + tpos, sbuf + op->offset, buf_len); 715251881Speter break; 716251881Speter 717251881Speter case svn_txdelta_target: 718251881Speter /* Copy from target area. We can't use memcpy() or the like 719251881Speter * since we need a specific semantics for overlapping copies: 720251881Speter * they must result in repeating patterns. 721251881Speter * Note that most copies won't have overlapping source and 722251881Speter * target ranges (they are just a result of self-compressed 723251881Speter * data) but a small percentage will. */ 724251881Speter assert(op->offset < tpos); 725251881Speter patterning_copy(tbuf + tpos, tbuf + op->offset, buf_len); 726251881Speter break; 727251881Speter 728251881Speter case svn_txdelta_new: 729251881Speter /* Copy from window new area. */ 730251881Speter assert(op->offset + op->length <= window->new_data->len); 731251881Speter fast_memcpy(tbuf + tpos, 732251881Speter window->new_data->data + op->offset, 733251881Speter buf_len); 734251881Speter break; 735251881Speter 736251881Speter default: 737251881Speter assert(!"Invalid delta instruction code"); 738251881Speter } 739251881Speter 740251881Speter tpos += op->length; 741251881Speter if (tpos >= *tlen) 742251881Speter return; /* The buffer is full. */ 743251881Speter } 744251881Speter 745251881Speter /* Check that we produced the right amount of data. */ 746251881Speter assert(tpos == window->tview_len); 747251881Speter *tlen = tpos; 748251881Speter} 749251881Speter 750251881Speter/* This is a private interlibrary compatibility wrapper. */ 751251881Spetervoid 752251881Spetersvn_txdelta__apply_instructions(svn_txdelta_window_t *window, 753251881Speter const char *sbuf, char *tbuf, 754251881Speter apr_size_t *tlen); 755251881Spetervoid 756251881Spetersvn_txdelta__apply_instructions(svn_txdelta_window_t *window, 757251881Speter const char *sbuf, char *tbuf, 758251881Speter apr_size_t *tlen) 759251881Speter{ 760251881Speter svn_txdelta_apply_instructions(window, sbuf, tbuf, tlen); 761251881Speter} 762251881Speter 763251881Speter 764251881Speter/* Apply WINDOW to the streams given by APPL. */ 765251881Speterstatic svn_error_t * 766251881Speterapply_window(svn_txdelta_window_t *window, void *baton) 767251881Speter{ 768251881Speter struct apply_baton *ab = (struct apply_baton *) baton; 769251881Speter apr_size_t len; 770251881Speter svn_error_t *err; 771251881Speter 772251881Speter if (window == NULL) 773251881Speter { 774251881Speter /* We're done; just clean up. */ 775251881Speter if (ab->result_digest) 776251881Speter apr_md5_final(ab->result_digest, &(ab->md5_context)); 777251881Speter 778251881Speter err = svn_stream_close(ab->target); 779251881Speter svn_pool_destroy(ab->pool); 780251881Speter 781251881Speter return err; 782251881Speter } 783251881Speter 784251881Speter /* Make sure the source view didn't slide backwards. */ 785251881Speter SVN_ERR_ASSERT(window->sview_len == 0 786251881Speter || (window->sview_offset >= ab->sbuf_offset 787251881Speter && (window->sview_offset + window->sview_len 788251881Speter >= ab->sbuf_offset + ab->sbuf_len))); 789251881Speter 790251881Speter /* Make sure there's enough room in the target buffer. */ 791251881Speter SVN_ERR(size_buffer(&ab->tbuf, &ab->tbuf_size, window->tview_len, ab->pool)); 792251881Speter 793251881Speter /* Prepare the source buffer for reading from the input stream. */ 794251881Speter if (window->sview_offset != ab->sbuf_offset 795251881Speter || window->sview_len > ab->sbuf_size) 796251881Speter { 797251881Speter char *old_sbuf = ab->sbuf; 798251881Speter 799251881Speter /* Make sure there's enough room. */ 800251881Speter SVN_ERR(size_buffer(&ab->sbuf, &ab->sbuf_size, window->sview_len, 801251881Speter ab->pool)); 802251881Speter 803251881Speter /* If the existing view overlaps with the new view, copy the 804251881Speter * overlap to the beginning of the new buffer. */ 805251881Speter if ( (apr_size_t)ab->sbuf_offset + ab->sbuf_len 806251881Speter > (apr_size_t)window->sview_offset) 807251881Speter { 808251881Speter apr_size_t start = 809251881Speter (apr_size_t)(window->sview_offset - ab->sbuf_offset); 810251881Speter memmove(ab->sbuf, old_sbuf + start, ab->sbuf_len - start); 811251881Speter ab->sbuf_len -= start; 812251881Speter } 813251881Speter else 814251881Speter ab->sbuf_len = 0; 815251881Speter ab->sbuf_offset = window->sview_offset; 816251881Speter } 817251881Speter 818251881Speter /* Read the remainder of the source view into the buffer. */ 819251881Speter if (ab->sbuf_len < window->sview_len) 820251881Speter { 821251881Speter len = window->sview_len - ab->sbuf_len; 822251881Speter err = svn_stream_read(ab->source, ab->sbuf + ab->sbuf_len, &len); 823251881Speter if (err == SVN_NO_ERROR && len != window->sview_len - ab->sbuf_len) 824251881Speter err = svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL, 825251881Speter "Delta source ended unexpectedly"); 826251881Speter if (err != SVN_NO_ERROR) 827251881Speter return err; 828251881Speter ab->sbuf_len = window->sview_len; 829251881Speter } 830251881Speter 831251881Speter /* Apply the window instructions to the source view to generate 832251881Speter the target view. */ 833251881Speter len = window->tview_len; 834251881Speter svn_txdelta_apply_instructions(window, ab->sbuf, ab->tbuf, &len); 835251881Speter SVN_ERR_ASSERT(len == window->tview_len); 836251881Speter 837251881Speter /* Write out the output. */ 838251881Speter 839251881Speter /* ### We've also considered just adding two (optionally null) 840251881Speter arguments to svn_stream_create(): read_checksum and 841251881Speter write_checksum. Then instead of every caller updating an md5 842251881Speter context when it calls svn_stream_write() or svn_stream_read(), 843251881Speter streams would do it automatically, and verify the checksum in 844251881Speter svn_stream_closed(). But this might be overkill for issue #689; 845251881Speter so for now we just update the context here. */ 846251881Speter if (ab->result_digest) 847251881Speter apr_md5_update(&(ab->md5_context), ab->tbuf, len); 848251881Speter 849251881Speter return svn_stream_write(ab->target, ab->tbuf, &len); 850251881Speter} 851251881Speter 852251881Speter 853251881Spetervoid 854251881Spetersvn_txdelta_apply(svn_stream_t *source, 855251881Speter svn_stream_t *target, 856251881Speter unsigned char *result_digest, 857251881Speter const char *error_info, 858251881Speter apr_pool_t *pool, 859251881Speter svn_txdelta_window_handler_t *handler, 860251881Speter void **handler_baton) 861251881Speter{ 862251881Speter apr_pool_t *subpool = svn_pool_create(pool); 863251881Speter struct apply_baton *ab; 864251881Speter 865251881Speter ab = apr_palloc(subpool, sizeof(*ab)); 866251881Speter ab->source = source; 867251881Speter ab->target = target; 868251881Speter ab->pool = subpool; 869251881Speter ab->sbuf = NULL; 870251881Speter ab->sbuf_size = 0; 871251881Speter ab->sbuf_offset = 0; 872251881Speter ab->sbuf_len = 0; 873251881Speter ab->tbuf = NULL; 874251881Speter ab->tbuf_size = 0; 875251881Speter ab->result_digest = result_digest; 876251881Speter 877251881Speter if (result_digest) 878251881Speter apr_md5_init(&(ab->md5_context)); 879251881Speter 880251881Speter if (error_info) 881251881Speter ab->error_info = apr_pstrdup(subpool, error_info); 882251881Speter else 883251881Speter ab->error_info = NULL; 884251881Speter 885251881Speter *handler = apply_window; 886251881Speter *handler_baton = ab; 887251881Speter} 888251881Speter 889251881Speter 890251881Speter 891251881Speter/* Convenience routines */ 892251881Speter 893251881Spetersvn_error_t * 894251881Spetersvn_txdelta_send_string(const svn_string_t *string, 895251881Speter svn_txdelta_window_handler_t handler, 896251881Speter void *handler_baton, 897251881Speter apr_pool_t *pool) 898251881Speter{ 899251881Speter svn_txdelta_window_t window = { 0 }; 900251881Speter svn_txdelta_op_t op; 901251881Speter 902251881Speter /* Build a single `new' op */ 903251881Speter op.action_code = svn_txdelta_new; 904251881Speter op.offset = 0; 905251881Speter op.length = string->len; 906251881Speter 907251881Speter /* Build a single window containing a ptr to the string. */ 908251881Speter window.tview_len = string->len; 909251881Speter window.num_ops = 1; 910251881Speter window.ops = &op; 911251881Speter window.new_data = string; 912251881Speter 913251881Speter /* Push the one window at the handler. */ 914251881Speter SVN_ERR((*handler)(&window, handler_baton)); 915251881Speter 916251881Speter /* Push a NULL at the handler, because we're done. */ 917251881Speter return (*handler)(NULL, handler_baton); 918251881Speter} 919251881Speter 920251881Spetersvn_error_t *svn_txdelta_send_stream(svn_stream_t *stream, 921251881Speter svn_txdelta_window_handler_t handler, 922251881Speter void *handler_baton, 923251881Speter unsigned char *digest, 924251881Speter apr_pool_t *pool) 925251881Speter{ 926251881Speter svn_txdelta_window_t delta_window = { 0 }; 927251881Speter svn_txdelta_op_t delta_op; 928251881Speter svn_string_t window_data; 929251881Speter char read_buf[SVN__STREAM_CHUNK_SIZE + 1]; 930251881Speter svn_checksum_ctx_t *md5_checksum_ctx; 931251881Speter 932251881Speter if (digest) 933251881Speter md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); 934251881Speter 935251881Speter while (1) 936251881Speter { 937251881Speter apr_size_t read_len = SVN__STREAM_CHUNK_SIZE; 938251881Speter 939251881Speter SVN_ERR(svn_stream_read(stream, read_buf, &read_len)); 940251881Speter if (read_len == 0) 941251881Speter break; 942251881Speter 943251881Speter window_data.data = read_buf; 944251881Speter window_data.len = read_len; 945251881Speter 946251881Speter delta_op.action_code = svn_txdelta_new; 947251881Speter delta_op.offset = 0; 948251881Speter delta_op.length = read_len; 949251881Speter 950251881Speter delta_window.tview_len = read_len; 951251881Speter delta_window.num_ops = 1; 952251881Speter delta_window.ops = &delta_op; 953251881Speter delta_window.new_data = &window_data; 954251881Speter 955251881Speter SVN_ERR(handler(&delta_window, handler_baton)); 956251881Speter 957251881Speter if (digest) 958251881Speter SVN_ERR(svn_checksum_update(md5_checksum_ctx, read_buf, read_len)); 959251881Speter 960251881Speter if (read_len < SVN__STREAM_CHUNK_SIZE) 961251881Speter break; 962251881Speter } 963251881Speter SVN_ERR(handler(NULL, handler_baton)); 964251881Speter 965251881Speter if (digest) 966251881Speter { 967251881Speter svn_checksum_t *md5_checksum; 968251881Speter 969251881Speter SVN_ERR(svn_checksum_final(&md5_checksum, md5_checksum_ctx, pool)); 970251881Speter memcpy(digest, md5_checksum->digest, APR_MD5_DIGESTSIZE); 971251881Speter } 972251881Speter 973251881Speter return SVN_NO_ERROR; 974251881Speter} 975251881Speter 976251881Spetersvn_error_t *svn_txdelta_send_txstream(svn_txdelta_stream_t *txstream, 977251881Speter svn_txdelta_window_handler_t handler, 978251881Speter void *handler_baton, 979251881Speter apr_pool_t *pool) 980251881Speter{ 981251881Speter svn_txdelta_window_t *window; 982251881Speter 983251881Speter /* create a pool just for the windows */ 984251881Speter apr_pool_t *wpool = svn_pool_create(pool); 985251881Speter 986251881Speter do 987251881Speter { 988251881Speter /* free the window (if any) */ 989251881Speter svn_pool_clear(wpool); 990251881Speter 991251881Speter /* read in a single delta window */ 992251881Speter SVN_ERR(svn_txdelta_next_window(&window, txstream, wpool)); 993251881Speter 994251881Speter /* shove it at the handler */ 995251881Speter SVN_ERR((*handler)(window, handler_baton)); 996251881Speter } 997251881Speter while (window != NULL); 998251881Speter 999251881Speter svn_pool_destroy(wpool); 1000251881Speter 1001251881Speter return SVN_NO_ERROR; 1002251881Speter} 1003251881Speter 1004251881Spetersvn_error_t * 1005251881Spetersvn_txdelta_send_contents(const unsigned char *contents, 1006251881Speter apr_size_t len, 1007251881Speter svn_txdelta_window_handler_t handler, 1008251881Speter void *handler_baton, 1009251881Speter apr_pool_t *pool) 1010251881Speter{ 1011251881Speter svn_string_t new_data; 1012251881Speter svn_txdelta_op_t op = { svn_txdelta_new, 0, 0 }; 1013251881Speter svn_txdelta_window_t window = { 0, 0, 0, 1, 0 }; 1014251881Speter window.ops = &op; 1015251881Speter window.new_data = &new_data; 1016251881Speter 1017251881Speter /* send CONTENT as a series of max-sized windows */ 1018251881Speter while (len > 0) 1019251881Speter { 1020251881Speter /* stuff next chunk into the window */ 1021251881Speter window.tview_len = len < SVN_DELTA_WINDOW_SIZE 1022251881Speter ? len 1023251881Speter : SVN_DELTA_WINDOW_SIZE; 1024251881Speter op.length = window.tview_len; 1025251881Speter new_data.len = window.tview_len; 1026251881Speter new_data.data = (const char*)contents; 1027251881Speter 1028251881Speter /* update remaining */ 1029251881Speter contents += window.tview_len; 1030251881Speter len -= window.tview_len; 1031251881Speter 1032251881Speter /* shove it at the handler */ 1033251881Speter SVN_ERR((*handler)(&window, handler_baton)); 1034251881Speter } 1035251881Speter 1036251881Speter /* indicate end of stream */ 1037251881Speter SVN_ERR((*handler)(NULL, handler_baton)); 1038251881Speter 1039251881Speter return SVN_NO_ERROR; 1040251881Speter} 1041251881Speter 1042