1/*
2 * svndumpfilter.c: Subversion dump stream filtering tool main file.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <stdlib.h>
26
27#include <apr_file_io.h>
28
29#include "svn_private_config.h"
30#include "svn_cmdline.h"
31#include "svn_error.h"
32#include "svn_string.h"
33#include "svn_opt.h"
34#include "svn_utf.h"
35#include "svn_dirent_uri.h"
36#include "svn_path.h"
37#include "svn_hash.h"
38#include "svn_repos.h"
39#include "svn_fs.h"
40#include "svn_pools.h"
41#include "svn_sorts.h"
42#include "svn_props.h"
43#include "svn_mergeinfo.h"
44#include "svn_version.h"
45
46#include "private/svn_mergeinfo_private.h"
47#include "private/svn_cmdline_private.h"
48#include "private/svn_subr_private.h"
49
50#ifdef _WIN32
51typedef apr_status_t (__stdcall *open_fn_t)(apr_file_t **, apr_pool_t *);
52#else
53typedef apr_status_t (*open_fn_t)(apr_file_t **, apr_pool_t *);
54#endif
55
56/*** Code. ***/
57
58/* Helper to open stdio streams */
59
60/* NOTE: we used to call svn_stream_from_stdio(), which wraps a stream
61   around a standard stdio.h FILE pointer.  The problem is that these
62   pointers operate through C Run Time (CRT) on Win32, which does all
63   sorts of translation on them: LF's become CRLF's, and ctrl-Z's
64   embedded in Word documents are interpreted as premature EOF's.
65
66   So instead, we use apr_file_open_std*, which bypass the CRT and
67   directly wrap the OS's file-handles, which don't know or care about
68   translation.  Thus dump/load works correctly on Win32.
69*/
70static svn_error_t *
71create_stdio_stream(svn_stream_t **stream,
72                    open_fn_t open_fn,
73                    apr_pool_t *pool)
74{
75  apr_file_t *stdio_file;
76  apr_status_t apr_err = open_fn(&stdio_file, pool);
77
78  if (apr_err)
79    return svn_error_wrap_apr(apr_err, _("Can't open stdio file"));
80
81  *stream = svn_stream_from_aprfile2(stdio_file, TRUE, pool);
82  return SVN_NO_ERROR;
83}
84
85
86/* Writes a property in dumpfile format to given stringbuf. */
87static void
88write_prop_to_stringbuf(svn_stringbuf_t *strbuf,
89                        const char *name,
90                        const svn_string_t *value)
91{
92  int bytes_used;
93  size_t namelen;
94  char buf[SVN_KEYLINE_MAXLEN];
95
96  /* Output name length, then name. */
97  namelen = strlen(name);
98  svn_stringbuf_appendbytes(strbuf, "K ", 2);
99
100  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
101  svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
102  svn_stringbuf_appendbyte(strbuf, '\n');
103
104  svn_stringbuf_appendbytes(strbuf, name, namelen);
105  svn_stringbuf_appendbyte(strbuf, '\n');
106
107  /* Output value length, then value. */
108  svn_stringbuf_appendbytes(strbuf, "V ", 2);
109
110  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, value->len);
111  svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
112  svn_stringbuf_appendbyte(strbuf, '\n');
113
114  svn_stringbuf_appendbytes(strbuf, value->data, value->len);
115  svn_stringbuf_appendbyte(strbuf, '\n');
116}
117
118
119/* Writes a property deletion in dumpfile format to given stringbuf. */
120static void
121write_propdel_to_stringbuf(svn_stringbuf_t **strbuf,
122                           const char *name)
123{
124  int bytes_used;
125  size_t namelen;
126  char buf[SVN_KEYLINE_MAXLEN];
127
128  /* Output name length, then name. */
129  namelen = strlen(name);
130  svn_stringbuf_appendbytes(*strbuf, "D ", 2);
131
132  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
133  svn_stringbuf_appendbytes(*strbuf, buf, bytes_used);
134  svn_stringbuf_appendbyte(*strbuf, '\n');
135
136  svn_stringbuf_appendbytes(*strbuf, name, namelen);
137  svn_stringbuf_appendbyte(*strbuf, '\n');
138}
139
140
141/* Compare the node-path PATH with the (const char *) prefixes in PFXLIST.
142 * Return TRUE if any prefix is a prefix of PATH (matching whole path
143 * components); FALSE otherwise.
144 * PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
145static svn_boolean_t
146ary_prefix_match(const apr_array_header_t *pfxlist, const char *path)
147{
148  int i;
149  size_t path_len = strlen(path);
150
151  for (i = 0; i < pfxlist->nelts; i++)
152    {
153      const char *pfx = APR_ARRAY_IDX(pfxlist, i, const char *);
154      size_t pfx_len = strlen(pfx);
155
156      if (path_len < pfx_len)
157        continue;
158      if (strncmp(path, pfx, pfx_len) == 0
159          && (pfx_len == 1 || path[pfx_len] == '\0' || path[pfx_len] == '/'))
160        return TRUE;
161    }
162
163  return FALSE;
164}
165
166
167/* Check whether we need to skip this PATH based on its presence in
168   the PREFIXES list, and the DO_EXCLUDE option.
169   PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
170static APR_INLINE svn_boolean_t
171skip_path(const char *path, const apr_array_header_t *prefixes,
172          svn_boolean_t do_exclude, svn_boolean_t glob)
173{
174  const svn_boolean_t matches =
175    (glob
176     ? svn_cstring_match_glob_list(path, prefixes)
177     : ary_prefix_match(prefixes, path));
178
179  /* NXOR */
180  return (matches ? do_exclude : !do_exclude);
181}
182
183
184
185/* Note: the input stream parser calls us with events.
186   Output of the filtered dump occurs for the most part streamily with the
187   event callbacks, to avoid caching large quantities of data in memory.
188   The exceptions this are:
189   - All revision data (headers and props) must be cached until a non-skipped
190     node within the revision is found, or the revision is closed.
191   - Node headers and props must be cached until all props have been received
192     (to allow the Prop-content-length to be found). This is signalled either
193     by the node text arriving, or the node being closed.
194   The writing_begun members of the associated object batons track the state.
195   output_revision() and output_node() are called to cause this flushing of
196   cached data to occur.
197*/
198
199
200/* Filtering batons */
201
202struct revmap_t
203{
204  svn_revnum_t rev; /* Last non-dropped revision to which this maps. */
205  svn_boolean_t was_dropped; /* Was this revision dropped? */
206};
207
208struct parse_baton_t
209{
210  /* Command-line options values. */
211  svn_boolean_t do_exclude;
212  svn_boolean_t quiet;
213  svn_boolean_t glob;
214  svn_boolean_t drop_empty_revs;
215  svn_boolean_t drop_all_empty_revs;
216  svn_boolean_t do_renumber_revs;
217  svn_boolean_t preserve_revprops;
218  svn_boolean_t skip_missing_merge_sources;
219  svn_boolean_t allow_deltas;
220  apr_array_header_t *prefixes;
221
222  /* Input and output streams. */
223  svn_stream_t *in_stream;
224  svn_stream_t *out_stream;
225
226  /* State for the filtering process. */
227  apr_int32_t rev_drop_count;
228  apr_hash_t *dropped_nodes;
229  apr_hash_t *renumber_history;  /* svn_revnum_t -> struct revmap_t */
230  svn_revnum_t last_live_revision;
231  /* The oldest original revision, greater than r0, in the input
232     stream which was not filtered. */
233  svn_revnum_t oldest_original_rev;
234};
235
236struct revision_baton_t
237{
238  /* Reference to the global parse baton. */
239  struct parse_baton_t *pb;
240
241  /* Does this revision have node or prop changes? */
242  svn_boolean_t has_nodes;
243  svn_boolean_t has_props;
244
245  /* Did we drop any nodes? */
246  svn_boolean_t had_dropped_nodes;
247
248  /* Written to output stream? */
249  svn_boolean_t writing_begun;
250
251  /* The original and new (re-mapped) revision numbers. */
252  svn_revnum_t rev_orig;
253  svn_revnum_t rev_actual;
254
255  /* Pointers to dumpfile data. */
256  svn_stringbuf_t *header;
257  apr_hash_t *props;
258};
259
260struct node_baton_t
261{
262  /* Reference to the current revision baton. */
263  struct revision_baton_t *rb;
264
265  /* Are we skipping this node? */
266  svn_boolean_t do_skip;
267
268  /* Have we been instructed to change or remove props on, or change
269     the text of, this node? */
270  svn_boolean_t has_props;
271  svn_boolean_t has_text;
272
273  /* Written to output stream? */
274  svn_boolean_t writing_begun;
275
276  /* The text content length according to the dumpfile headers, because we
277     need the length before we have the actual text. */
278  svn_filesize_t tcl;
279
280  /* Pointers to dumpfile data. */
281  svn_stringbuf_t *header;
282  svn_stringbuf_t *props;
283
284  /* Expect deltas? */
285  svn_boolean_t has_prop_delta;
286  svn_boolean_t has_text_delta;
287
288  /* We might need the node path in a parse error message. */
289  char *node_path;
290};
291
292
293
294/* Filtering vtable members */
295
296/* File-format stamp. */
297static svn_error_t *
298magic_header_record(int version, void *parse_baton, apr_pool_t *pool)
299{
300  struct parse_baton_t *pb = parse_baton;
301
302  if (version >= SVN_REPOS_DUMPFILE_FORMAT_VERSION_DELTAS)
303    pb->allow_deltas = TRUE;
304
305  SVN_ERR(svn_stream_printf(pb->out_stream, pool,
306                            SVN_REPOS_DUMPFILE_MAGIC_HEADER ": %d\n\n",
307                            version));
308
309  return SVN_NO_ERROR;
310}
311
312
313/* New revision: set up revision_baton, decide if we skip it. */
314static svn_error_t *
315new_revision_record(void **revision_baton,
316                    apr_hash_t *headers,
317                    void *parse_baton,
318                    apr_pool_t *pool)
319{
320  struct revision_baton_t *rb;
321  apr_hash_index_t *hi;
322  const char *rev_orig;
323  svn_stream_t *header_stream;
324
325  *revision_baton = apr_palloc(pool, sizeof(struct revision_baton_t));
326  rb = *revision_baton;
327  rb->pb = parse_baton;
328  rb->has_nodes = FALSE;
329  rb->has_props = FALSE;
330  rb->had_dropped_nodes = FALSE;
331  rb->writing_begun = FALSE;
332  rb->header = svn_stringbuf_create_empty(pool);
333  rb->props = apr_hash_make(pool);
334
335  header_stream = svn_stream_from_stringbuf(rb->header, pool);
336
337  rev_orig = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER);
338  rb->rev_orig = SVN_STR_TO_REV(rev_orig);
339
340  if (rb->pb->do_renumber_revs)
341    rb->rev_actual = rb->rev_orig - rb->pb->rev_drop_count;
342  else
343    rb->rev_actual = rb->rev_orig;
344
345  SVN_ERR(svn_stream_printf(header_stream, pool,
346                            SVN_REPOS_DUMPFILE_REVISION_NUMBER ": %ld\n",
347                            rb->rev_actual));
348
349  for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
350    {
351      const char *key = svn__apr_hash_index_key(hi);
352      const char *val = svn__apr_hash_index_val(hi);
353
354      if ((!strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH))
355          || (!strcmp(key, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH))
356          || (!strcmp(key, SVN_REPOS_DUMPFILE_REVISION_NUMBER)))
357        continue;
358
359      /* passthru: put header into header stringbuf. */
360
361      SVN_ERR(svn_stream_printf(header_stream, pool, "%s: %s\n",
362                                key, val));
363    }
364
365  SVN_ERR(svn_stream_close(header_stream));
366
367  return SVN_NO_ERROR;
368}
369
370
371/* Output revision to dumpstream
372   This may be called by new_node_record(), iff rb->has_nodes has been set
373   to TRUE, or by close_revision() otherwise. This must only be called
374   if rb->writing_begun is FALSE. */
375static svn_error_t *
376output_revision(struct revision_baton_t *rb)
377{
378  int bytes_used;
379  char buf[SVN_KEYLINE_MAXLEN];
380  apr_hash_index_t *hi;
381  svn_boolean_t write_out_rev = FALSE;
382  apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
383  svn_stringbuf_t *props = svn_stringbuf_create_empty(hash_pool);
384  apr_pool_t *subpool = svn_pool_create(hash_pool);
385
386  rb->writing_begun = TRUE;
387
388  /* If this revision has no nodes left because the ones it had were
389     dropped, and we are not dropping empty revisions, and we were not
390     told to preserve revision props, then we want to fixup the
391     revision props to only contain:
392       - the date
393       - a log message that reports that this revision is just stuffing. */
394  if ((! rb->pb->preserve_revprops)
395      && (! rb->has_nodes)
396      && rb->had_dropped_nodes
397      && (! rb->pb->drop_empty_revs)
398      && (! rb->pb->drop_all_empty_revs))
399    {
400      apr_hash_t *old_props = rb->props;
401      rb->has_props = TRUE;
402      rb->props = apr_hash_make(hash_pool);
403      svn_hash_sets(rb->props, SVN_PROP_REVISION_DATE,
404                    svn_hash_gets(old_props, SVN_PROP_REVISION_DATE));
405      svn_hash_sets(rb->props, SVN_PROP_REVISION_LOG,
406                    svn_string_create(_("This is an empty revision for "
407                                        "padding."), hash_pool));
408    }
409
410  /* Now, "rasterize" the props to a string, and append the property
411     information to the header string.  */
412  if (rb->has_props)
413    {
414      for (hi = apr_hash_first(subpool, rb->props);
415           hi;
416           hi = apr_hash_next(hi))
417        {
418          const char *pname = svn__apr_hash_index_key(hi);
419          const svn_string_t *pval = svn__apr_hash_index_val(hi);
420
421          write_prop_to_stringbuf(props, pname, pval);
422        }
423      svn_stringbuf_appendcstr(props, "PROPS-END\n");
424      svn_stringbuf_appendcstr(rb->header,
425                               SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
426      bytes_used = apr_snprintf(buf, sizeof(buf), ": %" APR_SIZE_T_FMT,
427                                props->len);
428      svn_stringbuf_appendbytes(rb->header, buf, bytes_used);
429      svn_stringbuf_appendbyte(rb->header, '\n');
430    }
431
432  svn_stringbuf_appendcstr(rb->header, SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
433  bytes_used = apr_snprintf(buf, sizeof(buf), ": %" APR_SIZE_T_FMT, props->len);
434  svn_stringbuf_appendbytes(rb->header, buf, bytes_used);
435  svn_stringbuf_appendbyte(rb->header, '\n');
436
437  /* put an end to headers */
438  svn_stringbuf_appendbyte(rb->header, '\n');
439
440  /* put an end to revision */
441  svn_stringbuf_appendbyte(props, '\n');
442
443  /* write out the revision */
444  /* Revision is written out in the following cases:
445     1. If the revision has nodes or
446     it is revision 0 (Special case: To preserve the props on r0).
447     2. --drop-empty-revs has been supplied,
448     but revision has not all nodes dropped.
449     3. If no --drop-empty-revs or --drop-all-empty-revs have been supplied,
450     write out the revision which has no nodes to begin with.
451  */
452  if (rb->has_nodes || (rb->rev_orig == 0))
453    write_out_rev = TRUE;
454  else if (rb->pb->drop_empty_revs)
455    write_out_rev = ! rb->had_dropped_nodes;
456  else if (! rb->pb->drop_all_empty_revs)
457    write_out_rev = TRUE;
458
459  if (write_out_rev)
460    {
461      /* This revision is a keeper. */
462      SVN_ERR(svn_stream_write(rb->pb->out_stream,
463                               rb->header->data, &(rb->header->len)));
464      SVN_ERR(svn_stream_write(rb->pb->out_stream,
465                               props->data, &(props->len)));
466
467      /* Stash the oldest original rev not dropped. */
468      if (rb->rev_orig > 0
469          && !SVN_IS_VALID_REVNUM(rb->pb->oldest_original_rev))
470        rb->pb->oldest_original_rev = rb->rev_orig;
471
472      if (rb->pb->do_renumber_revs)
473        {
474          svn_revnum_t *rr_key;
475          struct revmap_t *rr_val;
476          apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
477          rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
478          rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
479          *rr_key = rb->rev_orig;
480          rr_val->rev = rb->rev_actual;
481          rr_val->was_dropped = FALSE;
482          apr_hash_set(rb->pb->renumber_history, rr_key,
483                       sizeof(*rr_key), rr_val);
484          rb->pb->last_live_revision = rb->rev_actual;
485        }
486
487      if (! rb->pb->quiet)
488        SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
489                                    _("Revision %ld committed as %ld.\n"),
490                                    rb->rev_orig, rb->rev_actual));
491    }
492  else
493    {
494      /* We're dropping this revision. */
495      rb->pb->rev_drop_count++;
496      if (rb->pb->do_renumber_revs)
497        {
498          svn_revnum_t *rr_key;
499          struct revmap_t *rr_val;
500          apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
501          rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
502          rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
503          *rr_key = rb->rev_orig;
504          rr_val->rev = rb->pb->last_live_revision;
505          rr_val->was_dropped = TRUE;
506          apr_hash_set(rb->pb->renumber_history, rr_key,
507                       sizeof(*rr_key), rr_val);
508        }
509
510      if (! rb->pb->quiet)
511        SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
512                                    _("Revision %ld skipped.\n"),
513                                    rb->rev_orig));
514    }
515  svn_pool_destroy(subpool);
516  return SVN_NO_ERROR;
517}
518
519
520/* UUID record here: dump it, as we do not filter them. */
521static svn_error_t *
522uuid_record(const char *uuid, void *parse_baton, apr_pool_t *pool)
523{
524  struct parse_baton_t *pb = parse_baton;
525  SVN_ERR(svn_stream_printf(pb->out_stream, pool,
526                            SVN_REPOS_DUMPFILE_UUID ": %s\n\n", uuid));
527  return SVN_NO_ERROR;
528}
529
530
531/* New node here. Set up node_baton by copying headers. */
532static svn_error_t *
533new_node_record(void **node_baton,
534                apr_hash_t *headers,
535                void *rev_baton,
536                apr_pool_t *pool)
537{
538  struct parse_baton_t *pb;
539  struct node_baton_t *nb;
540  char *node_path, *copyfrom_path;
541  apr_hash_index_t *hi;
542  const char *tcl;
543
544  *node_baton = apr_palloc(pool, sizeof(struct node_baton_t));
545  nb          = *node_baton;
546  nb->rb      = rev_baton;
547  pb          = nb->rb->pb;
548
549  node_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH);
550  copyfrom_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH);
551
552  /* Ensure that paths start with a leading '/'. */
553  if (node_path[0] != '/')
554    node_path = apr_pstrcat(pool, "/", node_path, (char *)NULL);
555  if (copyfrom_path && copyfrom_path[0] != '/')
556    copyfrom_path = apr_pstrcat(pool, "/", copyfrom_path, (char *)NULL);
557
558  nb->do_skip = skip_path(node_path, pb->prefixes,
559                          pb->do_exclude, pb->glob);
560
561  /* If we're skipping the node, take note of path, discarding the
562     rest.  */
563  if (nb->do_skip)
564    {
565      svn_hash_sets(pb->dropped_nodes,
566                    apr_pstrdup(apr_hash_pool_get(pb->dropped_nodes),
567                                node_path),
568                    (void *)1);
569      nb->rb->had_dropped_nodes = TRUE;
570    }
571  else
572    {
573      const char *kind;
574      const char *action;
575
576      tcl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
577
578      /* Test if this node was copied from dropped source. */
579      if (copyfrom_path &&
580          skip_path(copyfrom_path, pb->prefixes, pb->do_exclude, pb->glob))
581        {
582          /* This node was copied from a dropped source.
583             We have a problem, since we did not want to drop this node too.
584
585             However, there is one special case we'll handle.  If the node is
586             a file, and this was a copy-and-modify operation, then the
587             dumpfile should contain the new contents of the file.  In this
588             scenario, we'll just do an add without history using the new
589             contents.  */
590          kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
591
592          /* If there is a Text-content-length header, and the kind is
593             "file", we just fallback to an add without history. */
594          if (tcl && (strcmp(kind, "file") == 0))
595            {
596              svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH,
597                            NULL);
598              svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV,
599                            NULL);
600              copyfrom_path = NULL;
601            }
602          /* Else, this is either a directory or a file whose contents we
603             don't have readily available.  */
604          else
605            {
606              return svn_error_createf
607                (SVN_ERR_INCOMPLETE_DATA, 0,
608                 _("Invalid copy source path '%s'"), copyfrom_path);
609            }
610        }
611
612      nb->has_props = FALSE;
613      nb->has_text = FALSE;
614      nb->has_prop_delta = FALSE;
615      nb->has_text_delta = FALSE;
616      nb->writing_begun = FALSE;
617      nb->tcl = tcl ? svn__atoui64(tcl) : 0;
618      nb->header = svn_stringbuf_create_empty(pool);
619      nb->props = svn_stringbuf_create_empty(pool);
620      nb->node_path = apr_pstrdup(pool, node_path);
621
622      /* Now we know for sure that we have a node that will not be
623         skipped, flush the revision if it has not already been done. */
624      nb->rb->has_nodes = TRUE;
625      if (! nb->rb->writing_begun)
626        SVN_ERR(output_revision(nb->rb));
627
628      /* A node record is required to begin with 'Node-path', skip the
629         leading '/' to match the form used by 'svnadmin dump'. */
630      SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
631                                pool, "%s: %s\n",
632                                SVN_REPOS_DUMPFILE_NODE_PATH, node_path + 1));
633
634      /* Node-kind is next and is optional. */
635      kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
636      if (kind)
637        SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
638                                  pool, "%s: %s\n",
639                                  SVN_REPOS_DUMPFILE_NODE_KIND, kind));
640
641      /* Node-action is next and required. */
642      action = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_ACTION);
643      if (action)
644        SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
645                                  pool, "%s: %s\n",
646                                  SVN_REPOS_DUMPFILE_NODE_ACTION, action));
647      else
648        return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
649                                 _("Missing Node-action for path '%s'"),
650                                 node_path);
651
652      for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
653        {
654          const char *key = svn__apr_hash_index_key(hi);
655          const char *val = svn__apr_hash_index_val(hi);
656
657          if ((!strcmp(key, SVN_REPOS_DUMPFILE_PROP_DELTA))
658              && (!strcmp(val, "true")))
659            nb->has_prop_delta = TRUE;
660
661          if ((!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_DELTA))
662              && (!strcmp(val, "true")))
663            nb->has_text_delta = TRUE;
664
665          if ((!strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH))
666              || (!strcmp(key, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH))
667              || (!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH))
668              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_PATH))
669              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_KIND))
670              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_ACTION)))
671            continue;
672
673          /* Rewrite Node-Copyfrom-Rev if we are renumbering revisions.
674             The number points to some revision in the past. We keep track
675             of revision renumbering in an apr_hash, which maps original
676             revisions to new ones. Dropped revision are mapped to -1.
677             This should never happen here.
678          */
679          if (pb->do_renumber_revs
680              && (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV)))
681            {
682              svn_revnum_t cf_orig_rev;
683              struct revmap_t *cf_renum_val;
684
685              cf_orig_rev = SVN_STR_TO_REV(val);
686              cf_renum_val = apr_hash_get(pb->renumber_history,
687                                          &cf_orig_rev,
688                                          sizeof(svn_revnum_t));
689              if (! (cf_renum_val && SVN_IS_VALID_REVNUM(cf_renum_val->rev)))
690                return svn_error_createf
691                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
692                   _("No valid copyfrom revision in filtered stream"));
693              SVN_ERR(svn_stream_printf
694                      (nb->rb->pb->out_stream, pool,
695                       SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV ": %ld\n",
696                       cf_renum_val->rev));
697              continue;
698            }
699
700          /* passthru: put header straight to output */
701
702          SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
703                                    pool, "%s: %s\n",
704                                    key, val));
705        }
706    }
707
708  return SVN_NO_ERROR;
709}
710
711
712/* Output node header and props to dumpstream
713   This will be called by set_fulltext() after setting nb->has_text to TRUE,
714   if the node has any text, or by close_node() otherwise. This must only
715   be called if nb->writing_begun is FALSE. */
716static svn_error_t *
717output_node(struct node_baton_t *nb)
718{
719  int bytes_used;
720  char buf[SVN_KEYLINE_MAXLEN];
721
722  nb->writing_begun = TRUE;
723
724  /* when there are no props nb->props->len would be zero and won't mess up
725     Content-Length. */
726  if (nb->has_props)
727    svn_stringbuf_appendcstr(nb->props, "PROPS-END\n");
728
729  /* 1. recalculate & check text-md5 if present. Passed through right now. */
730
731  /* 2. recalculate and add content-lengths */
732
733  if (nb->has_props)
734    {
735      svn_stringbuf_appendcstr(nb->header,
736                               SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
737      bytes_used = apr_snprintf(buf, sizeof(buf), ": %" APR_SIZE_T_FMT,
738                                nb->props->len);
739      svn_stringbuf_appendbytes(nb->header, buf, bytes_used);
740      svn_stringbuf_appendbyte(nb->header, '\n');
741    }
742  if (nb->has_text)
743    {
744      svn_stringbuf_appendcstr(nb->header,
745                               SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
746      bytes_used = apr_snprintf(buf, sizeof(buf), ": %" SVN_FILESIZE_T_FMT,
747                                nb->tcl);
748      svn_stringbuf_appendbytes(nb->header, buf, bytes_used);
749      svn_stringbuf_appendbyte(nb->header, '\n');
750    }
751  svn_stringbuf_appendcstr(nb->header, SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
752  bytes_used = apr_snprintf(buf, sizeof(buf), ": %" SVN_FILESIZE_T_FMT,
753                            (svn_filesize_t) (nb->props->len + nb->tcl));
754  svn_stringbuf_appendbytes(nb->header, buf, bytes_used);
755  svn_stringbuf_appendbyte(nb->header, '\n');
756
757  /* put an end to headers */
758  svn_stringbuf_appendbyte(nb->header, '\n');
759
760  /* 3. output all the stuff */
761
762  SVN_ERR(svn_stream_write(nb->rb->pb->out_stream,
763                           nb->header->data , &(nb->header->len)));
764  SVN_ERR(svn_stream_write(nb->rb->pb->out_stream,
765                           nb->props->data , &(nb->props->len)));
766
767  return SVN_NO_ERROR;
768}
769
770
771/* Examine the mergeinfo in INITIAL_VAL, omitting missing merge
772   sources or renumbering revisions in rangelists as appropriate, and
773   return the (possibly new) mergeinfo in *FINAL_VAL (allocated from
774   POOL). */
775static svn_error_t *
776adjust_mergeinfo(svn_string_t **final_val, const svn_string_t *initial_val,
777                 struct revision_baton_t *rb, apr_pool_t *pool)
778{
779  apr_hash_t *mergeinfo;
780  apr_hash_t *final_mergeinfo = apr_hash_make(pool);
781  apr_hash_index_t *hi;
782  apr_pool_t *subpool = svn_pool_create(pool);
783
784  SVN_ERR(svn_mergeinfo_parse(&mergeinfo, initial_val->data, subpool));
785
786  /* Issue #3020: If we are skipping missing merge sources, then also
787     filter mergeinfo ranges as old or older than the oldest revision in the
788     dump stream.  Those older than the oldest obviously refer to history
789     outside of the dump stream.  The oldest rev itself is present in the
790     dump, but cannot be a valid merge source revision since it is the
791     start of all history.  E.g. if we dump -r100:400 then dumpfilter the
792     result with --skip-missing-merge-sources, any mergeinfo with revision
793     100 implies a change of -r99:100, but r99 is part of the history we
794     want filtered.
795
796     If the oldest rev is r0 then there is nothing to filter. */
797  if (rb->pb->skip_missing_merge_sources && rb->pb->oldest_original_rev > 0)
798    SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges(
799      &mergeinfo, mergeinfo,
800      rb->pb->oldest_original_rev, 0,
801      FALSE, subpool, subpool));
802
803  for (hi = apr_hash_first(subpool, mergeinfo); hi; hi = apr_hash_next(hi))
804    {
805      const char *merge_source = svn__apr_hash_index_key(hi);
806      svn_rangelist_t *rangelist = svn__apr_hash_index_val(hi);
807      struct parse_baton_t *pb = rb->pb;
808
809      /* Determine whether the merge_source is a part of the prefix. */
810      if (skip_path(merge_source, pb->prefixes, pb->do_exclude, pb->glob))
811        {
812          if (pb->skip_missing_merge_sources)
813            continue;
814          else
815            return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
816                                     _("Missing merge source path '%s'; try "
817                                       "with --skip-missing-merge-sources"),
818                                     merge_source);
819        }
820
821      /* Possibly renumber revisions in merge source's rangelist. */
822      if (pb->do_renumber_revs)
823        {
824          int i;
825
826          for (i = 0; i < rangelist->nelts; i++)
827            {
828              struct revmap_t *revmap_start;
829              struct revmap_t *revmap_end;
830              svn_merge_range_t *range = APR_ARRAY_IDX(rangelist, i,
831                                                       svn_merge_range_t *);
832
833              revmap_start = apr_hash_get(pb->renumber_history,
834                                          &range->start, sizeof(svn_revnum_t));
835              if (! (revmap_start && SVN_IS_VALID_REVNUM(revmap_start->rev)))
836                return svn_error_createf
837                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
838                   _("No valid revision range 'start' in filtered stream"));
839
840              revmap_end = apr_hash_get(pb->renumber_history,
841                                        &range->end, sizeof(svn_revnum_t));
842              if (! (revmap_end && SVN_IS_VALID_REVNUM(revmap_end->rev)))
843                return svn_error_createf
844                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
845                   _("No valid revision range 'end' in filtered stream"));
846
847              range->start = revmap_start->rev;
848              range->end = revmap_end->rev;
849            }
850        }
851      svn_hash_sets(final_mergeinfo, merge_source, rangelist);
852    }
853
854  SVN_ERR(svn_mergeinfo__canonicalize_ranges(final_mergeinfo, subpool));
855  SVN_ERR(svn_mergeinfo_to_string(final_val, final_mergeinfo, pool));
856  svn_pool_destroy(subpool);
857
858  return SVN_NO_ERROR;
859}
860
861
862static svn_error_t *
863set_revision_property(void *revision_baton,
864                      const char *name,
865                      const svn_string_t *value)
866{
867  struct revision_baton_t *rb = revision_baton;
868  apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
869
870  rb->has_props = TRUE;
871  svn_hash_sets(rb->props,
872                apr_pstrdup(hash_pool, name),
873                svn_string_dup(value, hash_pool));
874  return SVN_NO_ERROR;
875}
876
877
878static svn_error_t *
879set_node_property(void *node_baton,
880                  const char *name,
881                  const svn_string_t *value)
882{
883  struct node_baton_t *nb = node_baton;
884  struct revision_baton_t *rb = nb->rb;
885
886  if (nb->do_skip)
887    return SVN_NO_ERROR;
888
889  if (! (nb->has_props || nb->has_prop_delta))
890    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
891                             _("Delta property block detected, but deltas "
892                               "are not enabled for node '%s' in original "
893                               "revision %ld"),
894                             nb->node_path, rb->rev_orig);
895
896  if (strcmp(name, SVN_PROP_MERGEINFO) == 0)
897    {
898      svn_string_t *filtered_mergeinfo;  /* Avoid compiler warning. */
899      apr_pool_t *pool = apr_hash_pool_get(rb->props);
900      SVN_ERR(adjust_mergeinfo(&filtered_mergeinfo, value, rb, pool));
901      value = filtered_mergeinfo;
902    }
903
904  nb->has_props = TRUE;
905  write_prop_to_stringbuf(nb->props, name, value);
906
907  return SVN_NO_ERROR;
908}
909
910
911static svn_error_t *
912delete_node_property(void *node_baton, const char *name)
913{
914  struct node_baton_t *nb = node_baton;
915  struct revision_baton_t *rb = nb->rb;
916
917  if (nb->do_skip)
918    return SVN_NO_ERROR;
919
920  if (!nb->has_prop_delta)
921    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
922                             _("Delta property block detected, but deltas "
923                               "are not enabled for node '%s' in original "
924                               "revision %ld"),
925                             nb->node_path, rb->rev_orig);
926
927  nb->has_props = TRUE;
928  write_propdel_to_stringbuf(&(nb->props), name);
929
930  return SVN_NO_ERROR;
931}
932
933
934static svn_error_t *
935remove_node_props(void *node_baton)
936{
937  struct node_baton_t *nb = node_baton;
938
939  /* In this case, not actually indicating that the node *has* props,
940     rather that we know about all the props that it has, since it now
941     has none. */
942  nb->has_props = TRUE;
943
944  return SVN_NO_ERROR;
945}
946
947
948static svn_error_t *
949set_fulltext(svn_stream_t **stream, void *node_baton)
950{
951  struct node_baton_t *nb = node_baton;
952
953  if (!nb->do_skip)
954    {
955      nb->has_text = TRUE;
956      if (! nb->writing_begun)
957        SVN_ERR(output_node(nb));
958      *stream = nb->rb->pb->out_stream;
959    }
960
961  return SVN_NO_ERROR;
962}
963
964
965/* Finalize node */
966static svn_error_t *
967close_node(void *node_baton)
968{
969  struct node_baton_t *nb = node_baton;
970  apr_size_t len = 2;
971
972  /* Get out of here if we can. */
973  if (nb->do_skip)
974    return SVN_NO_ERROR;
975
976  /* If the node was not flushed already to output its text, do it now. */
977  if (! nb->writing_begun)
978    SVN_ERR(output_node(nb));
979
980  /* put an end to node. */
981  SVN_ERR(svn_stream_write(nb->rb->pb->out_stream, "\n\n", &len));
982
983  return SVN_NO_ERROR;
984}
985
986
987/* Finalize revision */
988static svn_error_t *
989close_revision(void *revision_baton)
990{
991  struct revision_baton_t *rb = revision_baton;
992
993  /* If no node has yet flushed the revision, do it now. */
994  if (! rb->writing_begun)
995    return output_revision(rb);
996  else
997    return SVN_NO_ERROR;
998}
999
1000
1001/* Filtering vtable */
1002svn_repos_parse_fns3_t filtering_vtable =
1003  {
1004    magic_header_record,
1005    uuid_record,
1006    new_revision_record,
1007    new_node_record,
1008    set_revision_property,
1009    set_node_property,
1010    delete_node_property,
1011    remove_node_props,
1012    set_fulltext,
1013    NULL,
1014    close_node,
1015    close_revision
1016  };
1017
1018
1019
1020/** Subcommands. **/
1021
1022static svn_opt_subcommand_t
1023  subcommand_help,
1024  subcommand_exclude,
1025  subcommand_include;
1026
1027enum
1028  {
1029    svndumpfilter__drop_empty_revs = SVN_OPT_FIRST_LONGOPT_ID,
1030    svndumpfilter__drop_all_empty_revs,
1031    svndumpfilter__renumber_revs,
1032    svndumpfilter__preserve_revprops,
1033    svndumpfilter__skip_missing_merge_sources,
1034    svndumpfilter__targets,
1035    svndumpfilter__quiet,
1036    svndumpfilter__glob,
1037    svndumpfilter__version
1038  };
1039
1040/* Option codes and descriptions.
1041 *
1042 * The entire list must be terminated with an entry of nulls.
1043 */
1044static const apr_getopt_option_t options_table[] =
1045  {
1046    {"help",          'h', 0,
1047     N_("show help on a subcommand")},
1048
1049    {NULL,            '?', 0,
1050     N_("show help on a subcommand")},
1051
1052    {"version",            svndumpfilter__version, 0,
1053     N_("show program version information") },
1054    {"quiet",              svndumpfilter__quiet, 0,
1055     N_("Do not display filtering statistics.") },
1056    {"pattern",            svndumpfilter__glob, 0,
1057     N_("Treat the path prefixes as file glob patterns.") },
1058    {"drop-empty-revs",    svndumpfilter__drop_empty_revs, 0,
1059     N_("Remove revisions emptied by filtering.")},
1060    {"drop-all-empty-revs",    svndumpfilter__drop_all_empty_revs, 0,
1061     N_("Remove all empty revisions found in dumpstream\n"
1062        "                             except revision 0.")},
1063    {"renumber-revs",      svndumpfilter__renumber_revs, 0,
1064     N_("Renumber revisions left after filtering.") },
1065    {"skip-missing-merge-sources",
1066     svndumpfilter__skip_missing_merge_sources, 0,
1067     N_("Skip missing merge sources.") },
1068    {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
1069     N_("Don't filter revision properties.") },
1070    {"targets", svndumpfilter__targets, 1,
1071     N_("Read additional prefixes, one per line, from\n"
1072        "                             file ARG.")},
1073    {NULL}
1074  };
1075
1076
1077/* Array of available subcommands.
1078 * The entire list must be terminated with an entry of nulls.
1079 */
1080static const svn_opt_subcommand_desc2_t cmd_table[] =
1081  {
1082    {"exclude", subcommand_exclude, {0},
1083     N_("Filter out nodes with given prefixes from dumpstream.\n"
1084        "usage: svndumpfilter exclude PATH_PREFIX...\n"),
1085     {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
1086      svndumpfilter__renumber_revs,
1087      svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
1088      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
1089      svndumpfilter__glob} },
1090
1091    {"include", subcommand_include, {0},
1092     N_("Filter out nodes without given prefixes from dumpstream.\n"
1093        "usage: svndumpfilter include PATH_PREFIX...\n"),
1094     {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
1095      svndumpfilter__renumber_revs,
1096      svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
1097      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
1098      svndumpfilter__glob} },
1099
1100    {"help", subcommand_help, {"?", "h"},
1101     N_("Describe the usage of this program or its subcommands.\n"
1102        "usage: svndumpfilter help [SUBCOMMAND...]\n"),
1103     {0} },
1104
1105    { NULL, NULL, {0}, NULL, {0} }
1106  };
1107
1108
1109/* Baton for passing option/argument state to a subcommand function. */
1110struct svndumpfilter_opt_state
1111{
1112  svn_opt_revision_t start_revision;     /* -r X[:Y] is         */
1113  svn_opt_revision_t end_revision;       /* not implemented.    */
1114  svn_boolean_t quiet;                   /* --quiet             */
1115  svn_boolean_t glob;                    /* --pattern           */
1116  svn_boolean_t version;                 /* --version           */
1117  svn_boolean_t drop_empty_revs;         /* --drop-empty-revs   */
1118  svn_boolean_t drop_all_empty_revs;     /* --drop-all-empty-revs */
1119  svn_boolean_t help;                    /* --help or -?        */
1120  svn_boolean_t renumber_revs;           /* --renumber-revs     */
1121  svn_boolean_t preserve_revprops;       /* --preserve-revprops */
1122  svn_boolean_t skip_missing_merge_sources;
1123                                         /* --skip-missing-merge-sources */
1124  const char *targets_file;              /* --targets-file       */
1125  apr_array_header_t *prefixes;          /* mainargs.           */
1126};
1127
1128
1129static svn_error_t *
1130parse_baton_initialize(struct parse_baton_t **pb,
1131                       struct svndumpfilter_opt_state *opt_state,
1132                       svn_boolean_t do_exclude,
1133                       apr_pool_t *pool)
1134{
1135  struct parse_baton_t *baton = apr_palloc(pool, sizeof(*baton));
1136
1137  /* Read the stream from STDIN.  Users can redirect a file. */
1138  SVN_ERR(create_stdio_stream(&(baton->in_stream),
1139                              apr_file_open_stdin, pool));
1140
1141  /* Have the parser dump results to STDOUT. Users can redirect a file. */
1142  SVN_ERR(create_stdio_stream(&(baton->out_stream),
1143                              apr_file_open_stdout, pool));
1144
1145  baton->do_exclude = do_exclude;
1146
1147  /* Ignore --renumber-revs if there can't possibly be
1148     anything to renumber. */
1149  baton->do_renumber_revs =
1150    (opt_state->renumber_revs && (opt_state->drop_empty_revs
1151                                  || opt_state->drop_all_empty_revs));
1152
1153  baton->drop_empty_revs = opt_state->drop_empty_revs;
1154  baton->drop_all_empty_revs = opt_state->drop_all_empty_revs;
1155  baton->preserve_revprops = opt_state->preserve_revprops;
1156  baton->quiet = opt_state->quiet;
1157  baton->glob = opt_state->glob;
1158  baton->prefixes = opt_state->prefixes;
1159  baton->skip_missing_merge_sources = opt_state->skip_missing_merge_sources;
1160  baton->rev_drop_count = 0; /* used to shift revnums while filtering */
1161  baton->dropped_nodes = apr_hash_make(pool);
1162  baton->renumber_history = apr_hash_make(pool);
1163  baton->last_live_revision = SVN_INVALID_REVNUM;
1164  baton->oldest_original_rev = SVN_INVALID_REVNUM;
1165  baton->allow_deltas = FALSE;
1166
1167  *pb = baton;
1168  return SVN_NO_ERROR;
1169}
1170
1171/* This implements `help` subcommand. */
1172static svn_error_t *
1173subcommand_help(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1174{
1175  struct svndumpfilter_opt_state *opt_state = baton;
1176  const char *header =
1177    _("general usage: svndumpfilter SUBCOMMAND [ARGS & OPTIONS ...]\n"
1178      "Type 'svndumpfilter help <subcommand>' for help on a "
1179      "specific subcommand.\n"
1180      "Type 'svndumpfilter --version' to see the program version.\n"
1181      "\n"
1182      "Available subcommands:\n");
1183
1184  SVN_ERR(svn_opt_print_help4(os, "svndumpfilter",
1185                              opt_state ? opt_state->version : FALSE,
1186                              opt_state ? opt_state->quiet : FALSE,
1187                              /*###opt_state ? opt_state->verbose :*/ FALSE,
1188                              NULL, header, cmd_table, options_table,
1189                              NULL, NULL, pool));
1190
1191  return SVN_NO_ERROR;
1192}
1193
1194
1195/* Version compatibility check */
1196static svn_error_t *
1197check_lib_versions(void)
1198{
1199  static const svn_version_checklist_t checklist[] =
1200    {
1201      { "svn_subr",  svn_subr_version },
1202      { "svn_repos", svn_repos_version },
1203      { "svn_delta", svn_delta_version },
1204      { NULL, NULL }
1205    };
1206  SVN_VERSION_DEFINE(my_version);
1207
1208  return svn_ver_check_list2(&my_version, checklist, svn_ver_equal);
1209}
1210
1211
1212/* Do the real work of filtering. */
1213static svn_error_t *
1214do_filter(apr_getopt_t *os,
1215          void *baton,
1216          svn_boolean_t do_exclude,
1217          apr_pool_t *pool)
1218{
1219  struct svndumpfilter_opt_state *opt_state = baton;
1220  struct parse_baton_t *pb;
1221  apr_hash_index_t *hi;
1222  apr_array_header_t *keys;
1223  int i, num_keys;
1224
1225  if (! opt_state->quiet)
1226    {
1227      apr_pool_t *subpool = svn_pool_create(pool);
1228
1229      if (opt_state->glob)
1230        {
1231          SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1232                                      do_exclude
1233                                      ? (opt_state->drop_empty_revs
1234                                         || opt_state->drop_all_empty_revs)
1235                                        ? _("Excluding (and dropping empty "
1236                                            "revisions for) prefix patterns:\n")
1237                                        : _("Excluding prefix patterns:\n")
1238                                      : (opt_state->drop_empty_revs
1239                                         || opt_state->drop_all_empty_revs)
1240                                        ? _("Including (and dropping empty "
1241                                            "revisions for) prefix patterns:\n")
1242                                        : _("Including prefix patterns:\n")));
1243        }
1244      else
1245        {
1246          SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1247                                      do_exclude
1248                                      ? (opt_state->drop_empty_revs
1249                                         || opt_state->drop_all_empty_revs)
1250                                        ? _("Excluding (and dropping empty "
1251                                            "revisions for) prefixes:\n")
1252                                        : _("Excluding prefixes:\n")
1253                                      : (opt_state->drop_empty_revs
1254                                         || opt_state->drop_all_empty_revs)
1255                                        ? _("Including (and dropping empty "
1256                                            "revisions for) prefixes:\n")
1257                                        : _("Including prefixes:\n")));
1258        }
1259
1260      for (i = 0; i < opt_state->prefixes->nelts; i++)
1261        {
1262          svn_pool_clear(subpool);
1263          SVN_ERR(svn_cmdline_fprintf
1264                  (stderr, subpool, "   '%s'\n",
1265                   APR_ARRAY_IDX(opt_state->prefixes, i, const char *)));
1266        }
1267
1268      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1269      svn_pool_destroy(subpool);
1270    }
1271
1272  SVN_ERR(parse_baton_initialize(&pb, opt_state, do_exclude, pool));
1273  SVN_ERR(svn_repos_parse_dumpstream3(pb->in_stream, &filtering_vtable, pb,
1274                                      TRUE, NULL, NULL, pool));
1275
1276  /* The rest of this is just reporting.  If we aren't reporting, get
1277     outta here. */
1278  if (opt_state->quiet)
1279    return SVN_NO_ERROR;
1280
1281  SVN_ERR(svn_cmdline_fputs("\n", stderr, pool));
1282
1283  if (pb->rev_drop_count)
1284    SVN_ERR(svn_cmdline_fprintf(stderr, pool,
1285                                Q_("Dropped %d revision.\n\n",
1286                                   "Dropped %d revisions.\n\n",
1287                                   pb->rev_drop_count),
1288                                pb->rev_drop_count));
1289
1290  if (pb->do_renumber_revs)
1291    {
1292      apr_pool_t *subpool = svn_pool_create(pool);
1293      SVN_ERR(svn_cmdline_fputs(_("Revisions renumbered as follows:\n"),
1294                                stderr, subpool));
1295
1296      /* Get the keys of the hash, sort them, then print the hash keys
1297         and values, sorted by keys. */
1298      num_keys = apr_hash_count(pb->renumber_history);
1299      keys = apr_array_make(pool, num_keys + 1, sizeof(svn_revnum_t));
1300      for (hi = apr_hash_first(pool, pb->renumber_history);
1301           hi;
1302           hi = apr_hash_next(hi))
1303        {
1304          const svn_revnum_t *revnum = svn__apr_hash_index_key(hi);
1305
1306          APR_ARRAY_PUSH(keys, svn_revnum_t) = *revnum;
1307        }
1308      qsort(keys->elts, keys->nelts,
1309            keys->elt_size, svn_sort_compare_revisions);
1310      for (i = 0; i < keys->nelts; i++)
1311        {
1312          svn_revnum_t this_key;
1313          struct revmap_t *this_val;
1314
1315          svn_pool_clear(subpool);
1316          this_key = APR_ARRAY_IDX(keys, i, svn_revnum_t);
1317          this_val = apr_hash_get(pb->renumber_history, &this_key,
1318                                  sizeof(this_key));
1319          if (this_val->was_dropped)
1320            SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1321                                        _("   %ld => (dropped)\n"),
1322                                        this_key));
1323          else
1324            SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1325                                        "   %ld => %ld\n",
1326                                        this_key, this_val->rev));
1327        }
1328      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1329      svn_pool_destroy(subpool);
1330    }
1331
1332  if ((num_keys = apr_hash_count(pb->dropped_nodes)))
1333    {
1334      apr_pool_t *subpool = svn_pool_create(pool);
1335      SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1336                                  Q_("Dropped %d node:\n",
1337                                     "Dropped %d nodes:\n",
1338                                     num_keys),
1339                                  num_keys));
1340
1341      /* Get the keys of the hash, sort them, then print the hash keys
1342         and values, sorted by keys. */
1343      keys = apr_array_make(pool, num_keys + 1, sizeof(const char *));
1344      for (hi = apr_hash_first(pool, pb->dropped_nodes);
1345           hi;
1346           hi = apr_hash_next(hi))
1347        {
1348          const char *path = svn__apr_hash_index_key(hi);
1349
1350          APR_ARRAY_PUSH(keys, const char *) = path;
1351        }
1352      qsort(keys->elts, keys->nelts, keys->elt_size, svn_sort_compare_paths);
1353      for (i = 0; i < keys->nelts; i++)
1354        {
1355          svn_pool_clear(subpool);
1356          SVN_ERR(svn_cmdline_fprintf
1357                  (stderr, subpool, "   '%s'\n",
1358                   (const char *)APR_ARRAY_IDX(keys, i, const char *)));
1359        }
1360      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1361      svn_pool_destroy(subpool);
1362    }
1363
1364  return SVN_NO_ERROR;
1365}
1366
1367/* This implements `exclude' subcommand. */
1368static svn_error_t *
1369subcommand_exclude(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1370{
1371  return do_filter(os, baton, TRUE, pool);
1372}
1373
1374
1375/* This implements `include` subcommand. */
1376static svn_error_t *
1377subcommand_include(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1378{
1379  return do_filter(os, baton, FALSE, pool);
1380}
1381
1382
1383
1384/** Main. **/
1385
1386int
1387main(int argc, const char *argv[])
1388{
1389  svn_error_t *err;
1390  apr_status_t apr_err;
1391  apr_pool_t *pool;
1392
1393  const svn_opt_subcommand_desc2_t *subcommand = NULL;
1394  struct svndumpfilter_opt_state opt_state;
1395  apr_getopt_t *os;
1396  int opt_id;
1397  apr_array_header_t *received_opts;
1398  int i;
1399
1400
1401  /* Initialize the app. */
1402  if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
1403    return EXIT_FAILURE;
1404
1405  /* Create our top-level pool.  Use a separate mutexless allocator,
1406   * given this application is single threaded.
1407   */
1408  pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
1409
1410  /* Check library versions */
1411  err = check_lib_versions();
1412  if (err)
1413    return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1414
1415  received_opts = apr_array_make(pool, SVN_OPT_MAX_OPTIONS, sizeof(int));
1416
1417  /* Initialize the FS library. */
1418  err = svn_fs_initialize(pool);
1419  if (err)
1420    return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1421
1422  if (argc <= 1)
1423    {
1424      SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1425      svn_pool_destroy(pool);
1426      return EXIT_FAILURE;
1427    }
1428
1429  /* Initialize opt_state. */
1430  memset(&opt_state, 0, sizeof(opt_state));
1431  opt_state.start_revision.kind = svn_opt_revision_unspecified;
1432  opt_state.end_revision.kind = svn_opt_revision_unspecified;
1433
1434  /* Parse options. */
1435  err = svn_cmdline__getopt_init(&os, argc, argv, pool);
1436  if (err)
1437    return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1438
1439  os->interleave = 1;
1440  while (1)
1441    {
1442      const char *opt_arg;
1443
1444      /* Parse the next option. */
1445      apr_err = apr_getopt_long(os, options_table, &opt_id, &opt_arg);
1446      if (APR_STATUS_IS_EOF(apr_err))
1447        break;
1448      else if (apr_err)
1449        {
1450          SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1451          svn_pool_destroy(pool);
1452          return EXIT_FAILURE;
1453        }
1454
1455      /* Stash the option code in an array before parsing it. */
1456      APR_ARRAY_PUSH(received_opts, int) = opt_id;
1457
1458      switch (opt_id)
1459        {
1460        case 'h':
1461        case '?':
1462          opt_state.help = TRUE;
1463          break;
1464        case svndumpfilter__version:
1465          opt_state.version = TRUE;
1466          break;
1467        case svndumpfilter__quiet:
1468          opt_state.quiet = TRUE;
1469          break;
1470        case svndumpfilter__glob:
1471          opt_state.glob = TRUE;
1472          break;
1473        case svndumpfilter__drop_empty_revs:
1474          opt_state.drop_empty_revs = TRUE;
1475          break;
1476        case svndumpfilter__drop_all_empty_revs:
1477          opt_state.drop_all_empty_revs = TRUE;
1478          break;
1479        case svndumpfilter__renumber_revs:
1480          opt_state.renumber_revs = TRUE;
1481          break;
1482        case svndumpfilter__preserve_revprops:
1483          opt_state.preserve_revprops = TRUE;
1484          break;
1485        case svndumpfilter__skip_missing_merge_sources:
1486          opt_state.skip_missing_merge_sources = TRUE;
1487          break;
1488        case svndumpfilter__targets:
1489          opt_state.targets_file = opt_arg;
1490          break;
1491        default:
1492          {
1493            SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1494            svn_pool_destroy(pool);
1495            return EXIT_FAILURE;
1496          }
1497        }  /* close `switch' */
1498    }  /* close `while' */
1499
1500  /* Disallow simultaneous use of both --drop-empty-revs and
1501     --drop-all-empty-revs. */
1502  if (opt_state.drop_empty_revs && opt_state.drop_all_empty_revs)
1503    {
1504      err = svn_error_create(SVN_ERR_CL_MUTUALLY_EXCLUSIVE_ARGS, NULL,
1505                             _("--drop-empty-revs cannot be used with "
1506                               "--drop-all-empty-revs"));
1507      return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1508    }
1509
1510  /* If the user asked for help, then the rest of the arguments are
1511     the names of subcommands to get help on (if any), or else they're
1512     just typos/mistakes.  Whatever the case, the subcommand to
1513     actually run is subcommand_help(). */
1514  if (opt_state.help)
1515    subcommand = svn_opt_get_canonical_subcommand2(cmd_table, "help");
1516
1517  /* If we're not running the `help' subcommand, then look for a
1518     subcommand in the first argument. */
1519  if (subcommand == NULL)
1520    {
1521      if (os->ind >= os->argc)
1522        {
1523          if (opt_state.version)
1524            {
1525              /* Use the "help" subcommand to handle the "--version" option. */
1526              static const svn_opt_subcommand_desc2_t pseudo_cmd =
1527                { "--version", subcommand_help, {0}, "",
1528                  {svndumpfilter__version,  /* must accept its own option */
1529                   svndumpfilter__quiet,
1530                  } };
1531
1532              subcommand = &pseudo_cmd;
1533            }
1534          else
1535            {
1536              svn_error_clear(svn_cmdline_fprintf
1537                              (stderr, pool,
1538                               _("Subcommand argument required\n")));
1539              SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1540              svn_pool_destroy(pool);
1541              return EXIT_FAILURE;
1542            }
1543        }
1544      else
1545        {
1546          const char *first_arg = os->argv[os->ind++];
1547          subcommand = svn_opt_get_canonical_subcommand2(cmd_table, first_arg);
1548          if (subcommand == NULL)
1549            {
1550              const char* first_arg_utf8;
1551              if ((err = svn_utf_cstring_to_utf8(&first_arg_utf8, first_arg,
1552                                                 pool)))
1553                return svn_cmdline_handle_exit_error(err, pool,
1554                                                     "svndumpfilter: ");
1555
1556              svn_error_clear(
1557                svn_cmdline_fprintf(stderr, pool,
1558                                    _("Unknown subcommand: '%s'\n"),
1559                                    first_arg_utf8));
1560              SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1561              svn_pool_destroy(pool);
1562              return EXIT_FAILURE;
1563            }
1564        }
1565    }
1566
1567  /* If there's a second argument, it's probably [one of] prefixes.
1568     Every subcommand except `help' requires at least one, so we parse
1569     them out here and store in opt_state. */
1570
1571  if (subcommand->cmd_func != subcommand_help)
1572    {
1573
1574      opt_state.prefixes = apr_array_make(pool, os->argc - os->ind,
1575                                          sizeof(const char *));
1576      for (i = os->ind ; i< os->argc; i++)
1577        {
1578          const char *prefix;
1579
1580          /* Ensure that each prefix is UTF8-encoded, in internal
1581             style, and absolute. */
1582          SVN_INT_ERR(svn_utf_cstring_to_utf8(&prefix, os->argv[i], pool));
1583          prefix = svn_relpath__internal_style(prefix, pool);
1584          if (prefix[0] != '/')
1585            prefix = apr_pstrcat(pool, "/", prefix, (char *)NULL);
1586          APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1587        }
1588
1589      if (opt_state.targets_file)
1590        {
1591          svn_stringbuf_t *buffer, *buffer_utf8;
1592          const char *utf8_targets_file;
1593          apr_array_header_t *targets = apr_array_make(pool, 0,
1594                                                       sizeof(const char *));
1595
1596          /* We need to convert to UTF-8 now, even before we divide
1597             the targets into an array, because otherwise we wouldn't
1598             know what delimiter to use for svn_cstring_split().  */
1599
1600          SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_targets_file,
1601                                              opt_state.targets_file, pool));
1602
1603          SVN_INT_ERR(svn_stringbuf_from_file2(&buffer, utf8_targets_file,
1604                                               pool));
1605          SVN_INT_ERR(svn_utf_stringbuf_to_utf8(&buffer_utf8, buffer, pool));
1606
1607          targets = apr_array_append(pool,
1608                         svn_cstring_split(buffer_utf8->data, "\n\r",
1609                                           TRUE, pool),
1610                         targets);
1611
1612          for (i = 0; i < targets->nelts; i++)
1613            {
1614              const char *prefix = APR_ARRAY_IDX(targets, i, const char *);
1615              if (prefix[0] != '/')
1616                prefix = apr_pstrcat(pool, "/", prefix, (char *)NULL);
1617              APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1618            }
1619        }
1620
1621      if (apr_is_empty_array(opt_state.prefixes))
1622        {
1623          svn_error_clear(svn_cmdline_fprintf
1624                          (stderr, pool,
1625                           _("\nError: no prefixes supplied.\n")));
1626          svn_pool_destroy(pool);
1627          return EXIT_FAILURE;
1628        }
1629    }
1630
1631
1632  /* Check that the subcommand wasn't passed any inappropriate options. */
1633  for (i = 0; i < received_opts->nelts; i++)
1634    {
1635      opt_id = APR_ARRAY_IDX(received_opts, i, int);
1636
1637      /* All commands implicitly accept --help, so just skip over this
1638         when we see it. Note that we don't want to include this option
1639         in their "accepted options" list because it would be awfully
1640         redundant to display it in every commands' help text. */
1641      if (opt_id == 'h' || opt_id == '?')
1642        continue;
1643
1644      if (! svn_opt_subcommand_takes_option3(subcommand, opt_id, NULL))
1645        {
1646          const char *optstr;
1647          const apr_getopt_option_t *badopt =
1648            svn_opt_get_option_from_code2(opt_id, options_table, subcommand,
1649                                          pool);
1650          svn_opt_format_option(&optstr, badopt, FALSE, pool);
1651          if (subcommand->name[0] == '-')
1652            SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1653          else
1654            svn_error_clear(svn_cmdline_fprintf
1655                            (stderr, pool,
1656                             _("Subcommand '%s' doesn't accept option '%s'\n"
1657                               "Type 'svndumpfilter help %s' for usage.\n"),
1658                             subcommand->name, optstr, subcommand->name));
1659          svn_pool_destroy(pool);
1660          return EXIT_FAILURE;
1661        }
1662    }
1663
1664  /* Run the subcommand. */
1665  err = (*subcommand->cmd_func)(os, &opt_state, pool);
1666  if (err)
1667    {
1668      /* For argument-related problems, suggest using the 'help'
1669         subcommand. */
1670      if (err->apr_err == SVN_ERR_CL_INSUFFICIENT_ARGS
1671          || err->apr_err == SVN_ERR_CL_ARG_PARSING_ERROR)
1672        {
1673          err = svn_error_quick_wrap(err,
1674                                     _("Try 'svndumpfilter help' for more "
1675                                       "info"));
1676        }
1677      return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1678    }
1679  else
1680    {
1681      svn_pool_destroy(pool);
1682
1683      /* Flush stdout, making sure the user will see any print errors. */
1684      SVN_INT_ERR(svn_cmdline_fflush(stdout));
1685      return EXIT_SUCCESS;
1686    }
1687}
1688