binary_diff.c revision 362181
1/* 2 * binary_diff.c: handling of git like binary diffs 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24#include <apr.h> 25 26#include "svn_pools.h" 27#include "svn_error.h" 28#include "svn_diff.h" 29#include "svn_types.h" 30 31#include "diff.h" 32 33#include "svn_private_config.h" 34 35/* Copies the data from ORIGINAL_STREAM to a temporary file, returning both 36 the original and compressed size. */ 37static svn_error_t * 38create_compressed(apr_file_t **result, 39 svn_filesize_t *full_size, 40 svn_filesize_t *compressed_size, 41 svn_stream_t *original_stream, 42 svn_cancel_func_t cancel_func, 43 void *cancel_baton, 44 apr_pool_t *result_pool, 45 apr_pool_t *scratch_pool) 46{ 47 svn_stream_t *compressed; 48 svn_filesize_t bytes_read = 0; 49 apr_size_t rd; 50 51 SVN_ERR(svn_io_open_uniquely_named(result, NULL, NULL, "diffgz", 52 NULL, svn_io_file_del_on_pool_cleanup, 53 result_pool, scratch_pool)); 54 55 compressed = svn_stream_compressed( 56 svn_stream_from_aprfile2(*result, TRUE, scratch_pool), 57 scratch_pool); 58 59 if (original_stream) 60 do 61 { 62 char buffer[SVN__STREAM_CHUNK_SIZE]; 63 rd = sizeof(buffer); 64 65 if (cancel_func) 66 SVN_ERR(cancel_func(cancel_baton)); 67 68 SVN_ERR(svn_stream_read_full(original_stream, buffer, &rd)); 69 70 bytes_read += rd; 71 SVN_ERR(svn_stream_write(compressed, buffer, &rd)); 72 } 73 while(rd == SVN__STREAM_CHUNK_SIZE); 74 else 75 { 76 apr_size_t zero = 0; 77 SVN_ERR(svn_stream_write(compressed, NULL, &zero)); 78 } 79 80 SVN_ERR(svn_stream_close(compressed)); /* Flush compression */ 81 82 *full_size = bytes_read; 83 SVN_ERR(svn_io_file_size_get(compressed_size, *result, scratch_pool)); 84 85 return SVN_NO_ERROR; 86} 87 88#define GIT_BASE85_CHUNKSIZE 52 89 90/* Git Base-85 table for write_literal */ 91static const char b85str[] = 92 "0123456789" 93 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 94 "abcdefghijklmnopqrstuvwxyz" 95 "!#$%&()*+-;<=>?@^_`{|}~"; 96 97/* Helper function for svn_diff__base85_decode_line */ 98static svn_error_t * 99base85_value(int *value, char c) 100{ 101 const char *p = strchr(b85str, c); 102 if (!p) 103 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, 104 _("Invalid base85 value")); 105 106 /* It's safe to cast the ptrdiff_t value of the pointer difference 107 to int because the value will always be in the range [0..84]. */ 108 *value = (int)(p - b85str); 109 return SVN_NO_ERROR; 110} 111 112svn_error_t * 113svn_diff__base85_decode_line(char *output_data, 114 apr_ssize_t output_len, 115 const char *base85_data, 116 apr_ssize_t base85_len, 117 apr_pool_t *scratch_pool) 118{ 119 { 120 apr_ssize_t expected_data = (output_len + 3) / 4 * 5; 121 122 if (base85_len != expected_data) 123 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, 124 _("Unexpected base85 line length")); 125 } 126 127 while (base85_len) 128 { 129 unsigned info = 0; 130 apr_ssize_t i, n; 131 132 for (i = 0; i < 5; i++) 133 { 134 int value; 135 136 SVN_ERR(base85_value(&value, base85_data[i])); 137 info *= 85; 138 info += value; 139 } 140 141 for (i = 0, n=24; i < 4; i++, n-=8) 142 { 143 if (i < output_len) 144 output_data[i] = (info >> n) & 0xFF; 145 } 146 147 base85_data += 5; 148 base85_len -= 5; 149 output_data += 4; 150 output_len -= 4; 151 } 152 153 return SVN_NO_ERROR; 154} 155 156 157/* Git length encoding table for write_literal */ 158static const char b85lenstr[] = 159 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 160 "abcdefghijklmnopqrstuvwxyz"; 161 162/* Writes out a git-like literal output of the compressed data in 163 COMPRESSED_DATA to OUTPUT_STREAM, describing that its normal length is 164 UNCOMPRESSED_SIZE. */ 165static svn_error_t * 166write_literal(svn_filesize_t uncompressed_size, 167 svn_stream_t *compressed_data, 168 svn_stream_t *output_stream, 169 svn_cancel_func_t cancel_func, 170 void *cancel_baton, 171 apr_pool_t *scratch_pool) 172{ 173 apr_size_t rd; 174 SVN_ERR(svn_stream_seek(compressed_data, NULL)); /* Seek to start */ 175 176 SVN_ERR(svn_stream_printf(output_stream, scratch_pool, 177 "literal %" SVN_FILESIZE_T_FMT APR_EOL_STR, 178 uncompressed_size)); 179 180 do 181 { 182 char chunk[GIT_BASE85_CHUNKSIZE]; 183 const unsigned char *next; 184 apr_size_t left; 185 186 rd = sizeof(chunk); 187 188 if (cancel_func) 189 SVN_ERR(cancel_func(cancel_baton)); 190 191 SVN_ERR(svn_stream_read_full(compressed_data, chunk, &rd)); 192 193 { 194 apr_size_t one = 1; 195 SVN_ERR(svn_stream_write(output_stream, &b85lenstr[rd-1], &one)); 196 } 197 198 left = rd; 199 next = (void*)chunk; 200 while (left) 201 { 202 char five[5]; 203 unsigned info = 0; 204 int n; 205 apr_size_t five_sz; 206 207 /* Push 4 bytes into the 32 bit info, when available */ 208 for (n = 24; n >= 0 && left; n -= 8, next++, left--) 209 { 210 info |= (*next) << n; 211 } 212 213 /* Write out info as base85 */ 214 for (n = 4; n >= 0; n--) 215 { 216 five[n] = b85str[info % 85]; 217 info /= 85; 218 } 219 220 five_sz = 5; 221 SVN_ERR(svn_stream_write(output_stream, five, &five_sz)); 222 } 223 224 SVN_ERR(svn_stream_puts(output_stream, APR_EOL_STR)); 225 } 226 while (rd == GIT_BASE85_CHUNKSIZE); 227 228 return SVN_NO_ERROR; 229} 230 231svn_error_t * 232svn_diff_output_binary(svn_stream_t *output_stream, 233 svn_stream_t *original, 234 svn_stream_t *latest, 235 svn_cancel_func_t cancel_func, 236 void *cancel_baton, 237 apr_pool_t *scratch_pool) 238{ 239 apr_file_t *original_apr; 240 svn_filesize_t original_full; 241 svn_filesize_t original_deflated; 242 apr_file_t *latest_apr; 243 svn_filesize_t latest_full; 244 svn_filesize_t latest_deflated; 245 apr_pool_t *subpool = svn_pool_create(scratch_pool); 246 247 SVN_ERR(create_compressed(&original_apr, &original_full, &original_deflated, 248 original, cancel_func, cancel_baton, 249 scratch_pool, subpool)); 250 svn_pool_clear(subpool); 251 252 SVN_ERR(create_compressed(&latest_apr, &latest_full, &latest_deflated, 253 latest, cancel_func, cancel_baton, 254 scratch_pool, subpool)); 255 svn_pool_clear(subpool); 256 257 SVN_ERR(svn_stream_puts(output_stream, "GIT binary patch" APR_EOL_STR)); 258 259 /* ### git would first calculate if a git-delta latest->original would be 260 shorter than the zipped data. For now lets assume that it is not 261 and just dump the literal data */ 262 SVN_ERR(write_literal(latest_full, 263 svn_stream_from_aprfile2(latest_apr, FALSE, subpool), 264 output_stream, 265 cancel_func, cancel_baton, 266 scratch_pool)); 267 svn_pool_clear(subpool); 268 SVN_ERR(svn_stream_puts(output_stream, APR_EOL_STR)); 269 270 /* ### git would first calculate if a git-delta original->latest would be 271 shorter than the zipped data. For now lets assume that it is not 272 and just dump the literal data */ 273 SVN_ERR(write_literal(original_full, 274 svn_stream_from_aprfile2(original_apr, FALSE, subpool), 275 output_stream, 276 cancel_func, cancel_baton, 277 scratch_pool)); 278 svn_pool_destroy(subpool); 279 280 return SVN_NO_ERROR; 281} 282