binary_diff.c revision 362181
1/*
2 * binary_diff.c:  handling of git like binary diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24#include <apr.h>
25
26#include "svn_pools.h"
27#include "svn_error.h"
28#include "svn_diff.h"
29#include "svn_types.h"
30
31#include "diff.h"
32
33#include "svn_private_config.h"
34
35/* Copies the data from ORIGINAL_STREAM to a temporary file, returning both
36   the original and compressed size. */
37static svn_error_t *
38create_compressed(apr_file_t **result,
39                  svn_filesize_t *full_size,
40                  svn_filesize_t *compressed_size,
41                  svn_stream_t *original_stream,
42                  svn_cancel_func_t cancel_func,
43                  void *cancel_baton,
44                  apr_pool_t *result_pool,
45                  apr_pool_t *scratch_pool)
46{
47  svn_stream_t *compressed;
48  svn_filesize_t bytes_read = 0;
49  apr_size_t rd;
50
51  SVN_ERR(svn_io_open_uniquely_named(result, NULL, NULL, "diffgz",
52                                     NULL, svn_io_file_del_on_pool_cleanup,
53                                     result_pool, scratch_pool));
54
55  compressed = svn_stream_compressed(
56                  svn_stream_from_aprfile2(*result, TRUE, scratch_pool),
57                  scratch_pool);
58
59  if (original_stream)
60    do
61    {
62      char buffer[SVN__STREAM_CHUNK_SIZE];
63      rd = sizeof(buffer);
64
65      if (cancel_func)
66        SVN_ERR(cancel_func(cancel_baton));
67
68      SVN_ERR(svn_stream_read_full(original_stream, buffer, &rd));
69
70      bytes_read += rd;
71      SVN_ERR(svn_stream_write(compressed, buffer, &rd));
72    }
73    while(rd == SVN__STREAM_CHUNK_SIZE);
74  else
75    {
76      apr_size_t zero = 0;
77      SVN_ERR(svn_stream_write(compressed, NULL, &zero));
78    }
79
80  SVN_ERR(svn_stream_close(compressed)); /* Flush compression */
81
82  *full_size = bytes_read;
83  SVN_ERR(svn_io_file_size_get(compressed_size, *result, scratch_pool));
84
85  return SVN_NO_ERROR;
86}
87
88#define GIT_BASE85_CHUNKSIZE 52
89
90/* Git Base-85 table for write_literal */
91static const char b85str[] =
92    "0123456789"
93    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
94    "abcdefghijklmnopqrstuvwxyz"
95    "!#$%&()*+-;<=>?@^_`{|}~";
96
97/* Helper function for svn_diff__base85_decode_line */
98static svn_error_t *
99base85_value(int *value, char c)
100{
101  const char *p = strchr(b85str, c);
102  if (!p)
103    return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
104                            _("Invalid base85 value"));
105
106  /* It's safe to cast the ptrdiff_t value of the pointer difference
107     to int because the value will always be in the range [0..84]. */
108  *value = (int)(p - b85str);
109  return SVN_NO_ERROR;
110}
111
112svn_error_t *
113svn_diff__base85_decode_line(char *output_data,
114                             apr_ssize_t output_len,
115                             const char *base85_data,
116                             apr_ssize_t base85_len,
117                             apr_pool_t *scratch_pool)
118{
119  {
120    apr_ssize_t expected_data = (output_len + 3) / 4 * 5;
121
122    if (base85_len != expected_data)
123      return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
124                              _("Unexpected base85 line length"));
125  }
126
127  while (base85_len)
128    {
129      unsigned info = 0;
130      apr_ssize_t i, n;
131
132      for (i = 0; i < 5; i++)
133        {
134          int value;
135
136          SVN_ERR(base85_value(&value, base85_data[i]));
137          info *= 85;
138          info += value;
139        }
140
141      for (i = 0, n=24; i < 4; i++, n-=8)
142        {
143          if (i < output_len)
144            output_data[i] = (info >> n) & 0xFF;
145        }
146
147      base85_data += 5;
148      base85_len -= 5;
149      output_data += 4;
150      output_len -= 4;
151    }
152
153  return SVN_NO_ERROR;
154}
155
156
157/* Git length encoding table for write_literal */
158static const char b85lenstr[] =
159    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
160    "abcdefghijklmnopqrstuvwxyz";
161
162/* Writes out a git-like literal output of the compressed data in
163   COMPRESSED_DATA to OUTPUT_STREAM, describing that its normal length is
164   UNCOMPRESSED_SIZE. */
165static svn_error_t *
166write_literal(svn_filesize_t uncompressed_size,
167              svn_stream_t *compressed_data,
168              svn_stream_t *output_stream,
169              svn_cancel_func_t cancel_func,
170              void *cancel_baton,
171              apr_pool_t *scratch_pool)
172{
173  apr_size_t rd;
174  SVN_ERR(svn_stream_seek(compressed_data, NULL)); /* Seek to start */
175
176  SVN_ERR(svn_stream_printf(output_stream, scratch_pool,
177                            "literal %" SVN_FILESIZE_T_FMT APR_EOL_STR,
178                            uncompressed_size));
179
180  do
181    {
182      char chunk[GIT_BASE85_CHUNKSIZE];
183      const unsigned char *next;
184      apr_size_t left;
185
186      rd = sizeof(chunk);
187
188      if (cancel_func)
189        SVN_ERR(cancel_func(cancel_baton));
190
191      SVN_ERR(svn_stream_read_full(compressed_data, chunk, &rd));
192
193      {
194        apr_size_t one = 1;
195        SVN_ERR(svn_stream_write(output_stream, &b85lenstr[rd-1], &one));
196      }
197
198      left = rd;
199      next = (void*)chunk;
200      while (left)
201      {
202        char five[5];
203        unsigned info = 0;
204        int n;
205        apr_size_t five_sz;
206
207        /* Push 4 bytes into the 32 bit info, when available */
208        for (n = 24; n >= 0 && left; n -= 8, next++, left--)
209        {
210            info |= (*next) << n;
211        }
212
213        /* Write out info as base85 */
214        for (n = 4; n >= 0; n--)
215        {
216            five[n] = b85str[info % 85];
217            info /= 85;
218        }
219
220        five_sz = 5;
221        SVN_ERR(svn_stream_write(output_stream, five, &five_sz));
222      }
223
224      SVN_ERR(svn_stream_puts(output_stream, APR_EOL_STR));
225    }
226  while (rd == GIT_BASE85_CHUNKSIZE);
227
228  return SVN_NO_ERROR;
229}
230
231svn_error_t *
232svn_diff_output_binary(svn_stream_t *output_stream,
233                       svn_stream_t *original,
234                       svn_stream_t *latest,
235                       svn_cancel_func_t cancel_func,
236                       void *cancel_baton,
237                       apr_pool_t *scratch_pool)
238{
239  apr_file_t *original_apr;
240  svn_filesize_t original_full;
241  svn_filesize_t original_deflated;
242  apr_file_t *latest_apr;
243  svn_filesize_t latest_full;
244  svn_filesize_t latest_deflated;
245  apr_pool_t *subpool = svn_pool_create(scratch_pool);
246
247  SVN_ERR(create_compressed(&original_apr, &original_full, &original_deflated,
248                            original, cancel_func, cancel_baton,
249                            scratch_pool, subpool));
250  svn_pool_clear(subpool);
251
252  SVN_ERR(create_compressed(&latest_apr, &latest_full, &latest_deflated,
253                            latest,  cancel_func, cancel_baton,
254                            scratch_pool, subpool));
255  svn_pool_clear(subpool);
256
257  SVN_ERR(svn_stream_puts(output_stream, "GIT binary patch" APR_EOL_STR));
258
259  /* ### git would first calculate if a git-delta latest->original would be
260         shorter than the zipped data. For now lets assume that it is not
261         and just dump the literal data */
262  SVN_ERR(write_literal(latest_full,
263                        svn_stream_from_aprfile2(latest_apr, FALSE, subpool),
264                        output_stream,
265                        cancel_func, cancel_baton,
266                        scratch_pool));
267  svn_pool_clear(subpool);
268  SVN_ERR(svn_stream_puts(output_stream, APR_EOL_STR));
269
270  /* ### git would first calculate if a git-delta original->latest would be
271         shorter than the zipped data. For now lets assume that it is not
272         and just dump the literal data */
273  SVN_ERR(write_literal(original_full,
274                        svn_stream_from_aprfile2(original_apr, FALSE, subpool),
275                        output_stream,
276                        cancel_func, cancel_baton,
277                        scratch_pool));
278  svn_pool_destroy(subpool);
279
280  return SVN_NO_ERROR;
281}
282