1/*
2 * binary_diff.c:  handling of git like binary diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24#include <apr.h>
25
26#include "svn_pools.h"
27#include "svn_error.h"
28#include "svn_diff.h"
29#include "svn_types.h"
30
31/* Copies the data from ORIGINAL_STREAM to a temporary file, returning both
32   the original and compressed size. */
33static svn_error_t *
34create_compressed(apr_file_t **result,
35                  svn_filesize_t *full_size,
36                  svn_filesize_t *compressed_size,
37                  svn_stream_t *original_stream,
38                  svn_cancel_func_t cancel_func,
39                  void *cancel_baton,
40                  apr_pool_t *result_pool,
41                  apr_pool_t *scratch_pool)
42{
43  svn_stream_t *compressed;
44  svn_filesize_t bytes_read = 0;
45  apr_finfo_t finfo;
46  apr_size_t rd;
47
48  SVN_ERR(svn_io_open_uniquely_named(result, NULL, NULL, "diffgz",
49                                     NULL, svn_io_file_del_on_pool_cleanup,
50                                     result_pool, scratch_pool));
51
52  compressed = svn_stream_compressed(
53                  svn_stream_from_aprfile2(*result, TRUE, scratch_pool),
54                  scratch_pool);
55
56  if (original_stream)
57    do
58    {
59      char buffer[SVN_STREAM_CHUNK_SIZE];
60      rd = sizeof(buffer);
61
62      if (cancel_func)
63        SVN_ERR(cancel_func(cancel_baton));
64
65      SVN_ERR(svn_stream_read_full(original_stream, buffer, &rd));
66
67      bytes_read += rd;
68      SVN_ERR(svn_stream_write(compressed, buffer, &rd));
69    }
70    while(rd == SVN_STREAM_CHUNK_SIZE);
71  else
72    {
73      apr_size_t zero = 0;
74      SVN_ERR(svn_stream_write(compressed, NULL, &zero));
75    }
76
77  SVN_ERR(svn_stream_close(compressed)); /* Flush compression */
78
79  *full_size = bytes_read;
80  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *result, scratch_pool));
81  *compressed_size = finfo.size;
82
83  return SVN_NO_ERROR;
84}
85
86#define GIT_BASE85_CHUNKSIZE 52
87
88/* Git Base-85 table for write_literal */
89static const char b85str[] =
90    "0123456789"
91    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
92    "abcdefghijklmnopqrstuvwxyz"
93    "!#$%&()*+-;<=>?@^_`{|}~";
94
95/* Git length encoding table for write_literal */
96static const char b85lenstr[] =
97    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
98    "abcdefghijklmnopqrstuvwxyz";
99
100/* Writes out a git-like literal output of the compressed data in
101   COMPRESSED_DATA to OUTPUT_STREAM, describing that its normal length is
102   UNCOMPRESSED_SIZE. */
103static svn_error_t *
104write_literal(svn_filesize_t uncompressed_size,
105              svn_stream_t *compressed_data,
106              svn_stream_t *output_stream,
107              svn_cancel_func_t cancel_func,
108              void *cancel_baton,
109              apr_pool_t *scratch_pool)
110{
111  apr_size_t rd;
112  SVN_ERR(svn_stream_seek(compressed_data, NULL)); /* Seek to start */
113
114  SVN_ERR(svn_stream_printf(output_stream, scratch_pool,
115                            "literal %" SVN_FILESIZE_T_FMT APR_EOL_STR,
116                            uncompressed_size));
117
118  do
119    {
120      char chunk[GIT_BASE85_CHUNKSIZE];
121      const unsigned char *next;
122      apr_size_t left;
123
124      rd = sizeof(chunk);
125
126      if (cancel_func)
127        SVN_ERR(cancel_func(cancel_baton));
128
129      SVN_ERR(svn_stream_read_full(compressed_data, chunk, &rd));
130
131      {
132        apr_size_t one = 1;
133        SVN_ERR(svn_stream_write(output_stream, &b85lenstr[rd-1], &one));
134      }
135
136      left = rd;
137      next = (void*)chunk;
138      while (left)
139      {
140        char five[5];
141        unsigned info = 0;
142        int n;
143        apr_size_t five_sz;
144
145        /* Push 4 bytes into the 32 bit info, when available */
146        for (n = 24; n >= 0 && left; n -= 8, next++, left--)
147        {
148            info |= (*next) << n;
149        }
150
151        /* Write out info as base85 */
152        for (n = 4; n >= 0; n--)
153        {
154            five[n] = b85str[info % 85];
155            info /= 85;
156        }
157
158        five_sz = 5;
159        SVN_ERR(svn_stream_write(output_stream, five, &five_sz));
160      }
161
162      SVN_ERR(svn_stream_puts(output_stream, APR_EOL_STR));
163    }
164  while (rd == GIT_BASE85_CHUNKSIZE);
165
166  return SVN_NO_ERROR;
167}
168
169svn_error_t *
170svn_diff_output_binary(svn_stream_t *output_stream,
171                       svn_stream_t *original,
172                       svn_stream_t *latest,
173                       svn_cancel_func_t cancel_func,
174                       void *cancel_baton,
175                       apr_pool_t *scratch_pool)
176{
177  apr_file_t *original_apr;
178  svn_filesize_t original_full;
179  svn_filesize_t original_deflated;
180  apr_file_t *latest_apr;
181  svn_filesize_t latest_full;
182  svn_filesize_t latest_deflated;
183  apr_pool_t *subpool = svn_pool_create(scratch_pool);
184
185  SVN_ERR(create_compressed(&original_apr, &original_full, &original_deflated,
186                            original, cancel_func, cancel_baton,
187                            scratch_pool, subpool));
188  svn_pool_clear(subpool);
189
190  SVN_ERR(create_compressed(&latest_apr, &latest_full, &latest_deflated,
191                            latest,  cancel_func, cancel_baton,
192                            scratch_pool, subpool));
193  svn_pool_clear(subpool);
194
195  SVN_ERR(svn_stream_puts(output_stream, "GIT binary patch" APR_EOL_STR));
196
197  /* ### git would first calculate if a git-delta latest->original would be
198         shorter than the zipped data. For now lets assume that it is not
199         and just dump the literal data */
200  SVN_ERR(write_literal(latest_full,
201                        svn_stream_from_aprfile2(latest_apr, FALSE, subpool),
202                        output_stream,
203                        cancel_func, cancel_baton,
204                        scratch_pool));
205  svn_pool_clear(subpool);
206  SVN_ERR(svn_stream_puts(output_stream, APR_EOL_STR));
207
208  /* ### git would first calculate if a git-delta original->latest would be
209         shorter than the zipped data. For now lets assume that it is not
210         and just dump the literal data */
211  SVN_ERR(write_literal(original_full,
212                        svn_stream_from_aprfile2(original_apr, FALSE, subpool),
213                        output_stream,
214                        cancel_func, cancel_baton,
215                        scratch_pool));
216  svn_pool_destroy(subpool);
217
218  SVN_ERR(svn_stream_puts(output_stream, APR_EOL_STR));
219
220  return SVN_NO_ERROR;
221}
222