load.c revision 299742
1/* load.c --- parsing a 'dumpfile'-formatted stream.
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23
24#include <apr.h>
25
26#include "svn_hash.h"
27#include "svn_pools.h"
28#include "svn_error.h"
29#include "svn_repos.h"
30#include "svn_string.h"
31#include "repos.h"
32#include "svn_private_config.h"
33#include "svn_ctype.h"
34
35#include "private/svn_dep_compat.h"
36
37/*----------------------------------------------------------------------*/
38
39/** The parser and related helper funcs **/
40
41
42static svn_error_t *
43stream_ran_dry(void)
44{
45  return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
46                          _("Premature end of content data in dumpstream"));
47}
48
49static svn_error_t *
50stream_malformed(void)
51{
52  return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
53                          _("Dumpstream data appears to be malformed"));
54}
55
56/* Allocate a new hash *HEADERS in POOL, and read a series of
57   RFC822-style headers from STREAM.  Duplicate each header's name and
58   value into POOL and store in hash as a const char * ==> const char *.
59
60   The headers are assumed to be terminated by a single blank line,
61   which will be permanently sucked from the stream and tossed.
62
63   If the caller has already read in the first header line, it should
64   be passed in as FIRST_HEADER.  If not, pass NULL instead.
65 */
66static svn_error_t *
67read_header_block(svn_stream_t *stream,
68                  svn_stringbuf_t *first_header,
69                  apr_hash_t **headers,
70                  apr_pool_t *pool)
71{
72  *headers = apr_hash_make(pool);
73
74  while (1)
75    {
76      svn_stringbuf_t *header_str;
77      const char *name, *value;
78      svn_boolean_t eof;
79      apr_size_t i = 0;
80
81      if (first_header != NULL)
82        {
83          header_str = first_header;
84          first_header = NULL;  /* so we never visit this block again. */
85          eof = FALSE;
86        }
87
88      else
89        /* Read the next line into a stringbuf. */
90        SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool));
91
92      if (svn_stringbuf_isempty(header_str))
93        break;    /* end of header block */
94      else if (eof)
95        return stream_ran_dry();
96
97      /* Find the next colon in the stringbuf. */
98      while (header_str->data[i] != ':')
99        {
100          if (header_str->data[i] == '\0')
101            return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
102                                     _("Dump stream contains a malformed "
103                                       "header (with no ':') at '%.20s'"),
104                                     header_str->data);
105          i++;
106        }
107      /* Create a 'name' string and point to it. */
108      header_str->data[i] = '\0';
109      name = header_str->data;
110
111      /* Skip over the NULL byte and the space following it.  */
112      i += 2;
113      if (i > header_str->len)
114        return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
115                                 _("Dump stream contains a malformed "
116                                   "header (with no value) at '%.20s'"),
117                                 header_str->data);
118
119      /* Point to the 'value' string. */
120      value = header_str->data + i;
121
122      /* Store name/value in hash. */
123      svn_hash_sets(*headers, name, value);
124    }
125
126  return SVN_NO_ERROR;
127}
128
129
130/* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM.
131   Also read a newline from STREAM and increase *ACTUAL_LEN by the total
132   number of bytes read from STREAM.  */
133static svn_error_t *
134read_key_or_val(char **pbuf,
135                svn_filesize_t *actual_length,
136                svn_stream_t *stream,
137                apr_size_t len,
138                apr_pool_t *pool)
139{
140  char *buf = apr_pcalloc(pool, len + 1);
141  apr_size_t numread;
142  char c;
143
144  numread = len;
145  SVN_ERR(svn_stream_read_full(stream, buf, &numread));
146  *actual_length += numread;
147  if (numread != len)
148    return svn_error_trace(stream_ran_dry());
149  buf[len] = '\0';
150
151  /* Suck up extra newline after key data */
152  numread = 1;
153  SVN_ERR(svn_stream_read_full(stream, &c, &numread));
154  *actual_length += numread;
155  if (numread != 1)
156    return svn_error_trace(stream_ran_dry());
157  if (c != '\n')
158    return svn_error_trace(stream_malformed());
159
160  *pbuf = buf;
161  return SVN_NO_ERROR;
162}
163
164
165/* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an
166   encoded Subversion properties hash, and making multiple calls to
167   PARSE_FNS->set_*_property on RECORD_BATON (depending on the value
168   of IS_NODE.)
169
170   Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM.
171   If an error is returned, the value of *ACTUAL_LENGTH is undefined.
172
173   Use POOL for all allocations.  */
174static svn_error_t *
175parse_property_block(svn_stream_t *stream,
176                     svn_filesize_t content_length,
177                     const svn_repos_parse_fns3_t *parse_fns,
178                     void *record_baton,
179                     void *parse_baton,
180                     svn_boolean_t is_node,
181                     svn_filesize_t *actual_length,
182                     apr_pool_t *pool)
183{
184  svn_stringbuf_t *strbuf;
185  apr_pool_t *proppool = svn_pool_create(pool);
186
187  *actual_length = 0;
188  while (content_length != *actual_length)
189    {
190      char *buf;  /* a pointer into the stringbuf's data */
191      svn_boolean_t eof;
192
193      svn_pool_clear(proppool);
194
195      /* Read a key length line.  (Actually, it might be PROPS_END). */
196      SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
197
198      if (eof)
199        {
200          /* We could just use stream_ran_dry() or stream_malformed(),
201             but better to give a non-generic property block error. */
202          return svn_error_create
203            (SVN_ERR_STREAM_MALFORMED_DATA, NULL,
204             _("Incomplete or unterminated property block"));
205        }
206
207      *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */
208      buf = strbuf->data;
209
210      if (! strcmp(buf, "PROPS-END"))
211        break; /* no more properties. */
212
213      else if ((buf[0] == 'K') && (buf[1] == ' '))
214        {
215          char *keybuf;
216          apr_uint64_t len;
217
218          SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
219          SVN_ERR(read_key_or_val(&keybuf, actual_length,
220                                  stream, (apr_size_t)len, proppool));
221
222          /* Read a val length line */
223          SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
224          if (eof)
225            return stream_ran_dry();
226
227          *actual_length += (strbuf->len + 1); /* +1 because we read \n too */
228          buf = strbuf->data;
229
230          if ((buf[0] == 'V') && (buf[1] == ' '))
231            {
232              svn_string_t propstring;
233              char *valbuf;
234              apr_int64_t val;
235
236              SVN_ERR(svn_cstring_atoi64(&val, buf + 2));
237              propstring.len = (apr_size_t)val;
238              SVN_ERR(read_key_or_val(&valbuf, actual_length,
239                                      stream, propstring.len, proppool));
240              propstring.data = valbuf;
241
242              /* Now, send the property pair to the vtable! */
243              if (is_node)
244                {
245                  SVN_ERR(parse_fns->set_node_property(record_baton,
246                                                       keybuf,
247                                                       &propstring));
248                }
249              else
250                {
251                  SVN_ERR(parse_fns->set_revision_property(record_baton,
252                                                           keybuf,
253                                                           &propstring));
254                }
255            }
256          else
257            return stream_malformed(); /* didn't find expected 'V' line */
258        }
259      else if ((buf[0] == 'D') && (buf[1] == ' '))
260        {
261          char *keybuf;
262          apr_uint64_t len;
263
264          SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
265          SVN_ERR(read_key_or_val(&keybuf, actual_length,
266                                  stream, (apr_size_t)len, proppool));
267
268          /* We don't expect these in revision properties, and if we see
269             one when we don't have a delete_node_property callback,
270             then we're seeing a v3 feature in a v2 dump. */
271          if (!is_node || !parse_fns->delete_node_property)
272            return stream_malformed();
273
274          SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf));
275        }
276      else
277        return stream_malformed(); /* didn't find expected 'K' line */
278
279    } /* while (1) */
280
281  svn_pool_destroy(proppool);
282  return SVN_NO_ERROR;
283}
284
285
286/* Read CONTENT_LENGTH bytes from STREAM. If IS_DELTA is true, use
287   PARSE_FNS->apply_textdelta to push a text delta, otherwise use
288   PARSE_FNS->set_fulltext to push those bytes as replace fulltext for
289   a node.  Use BUFFER/BUFLEN to push the fulltext in "chunks".
290
291   Use POOL for all allocations.  */
292static svn_error_t *
293parse_text_block(svn_stream_t *stream,
294                 svn_filesize_t content_length,
295                 svn_boolean_t is_delta,
296                 const svn_repos_parse_fns3_t *parse_fns,
297                 void *record_baton,
298                 char *buffer,
299                 apr_size_t buflen,
300                 apr_pool_t *pool)
301{
302  svn_stream_t *text_stream = NULL;
303  apr_size_t num_to_read, rlen, wlen;
304
305  if (is_delta)
306    {
307      svn_txdelta_window_handler_t wh;
308      void *whb;
309
310      SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton));
311      if (wh)
312        text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool);
313    }
314  else
315    {
316      /* Get a stream to which we can push the data. */
317      SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton));
318    }
319
320  /* Regardless of whether or not we have a sink for our data, we
321     need to read it. */
322  while (content_length)
323    {
324      if (content_length >= (svn_filesize_t)buflen)
325        rlen = buflen;
326      else
327        rlen = (apr_size_t) content_length;
328
329      num_to_read = rlen;
330      SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
331      content_length -= rlen;
332      if (rlen != num_to_read)
333        return stream_ran_dry();
334
335      if (text_stream)
336        {
337          /* write however many bytes you read. */
338          wlen = rlen;
339          SVN_ERR(svn_stream_write(text_stream, buffer, &wlen));
340          if (wlen != rlen)
341            {
342              /* Uh oh, didn't write as many bytes as we read. */
343              return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL,
344                                      _("Unexpected EOF writing contents"));
345            }
346        }
347    }
348
349  /* If we opened a stream, we must close it. */
350  if (text_stream)
351    SVN_ERR(svn_stream_close(text_stream));
352
353  return SVN_NO_ERROR;
354}
355
356
357
358/* Parse VERSIONSTRING and verify that we support the dumpfile format
359   version number, setting *VERSION appropriately. */
360static svn_error_t *
361parse_format_version(int *version,
362                     const char *versionstring)
363{
364  static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1;
365  const char *p = strchr(versionstring, ':');
366  int value;
367
368  if (p == NULL
369      || p != (versionstring + magic_len)
370      || strncmp(versionstring,
371                 SVN_REPOS_DUMPFILE_MAGIC_HEADER,
372                 magic_len))
373    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
374                             _("Malformed dumpfile header '%s'"),
375                             versionstring);
376
377  SVN_ERR(svn_cstring_atoi(&value, p + 1));
378
379  if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION)
380    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
381                             _("Unsupported dumpfile version: %d"),
382                             value);
383
384  *version = value;
385  return SVN_NO_ERROR;
386}
387
388
389
390/*----------------------------------------------------------------------*/
391
392/** The public routines **/
393
394svn_error_t *
395svn_repos_parse_dumpstream3(svn_stream_t *stream,
396                            const svn_repos_parse_fns3_t *parse_fns,
397                            void *parse_baton,
398                            svn_boolean_t deltas_are_text,
399                            svn_cancel_func_t cancel_func,
400                            void *cancel_baton,
401                            apr_pool_t *pool)
402{
403  svn_boolean_t eof;
404  svn_stringbuf_t *linebuf;
405  void *rev_baton = NULL;
406  char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
407  apr_size_t buflen = SVN__STREAM_CHUNK_SIZE;
408  apr_pool_t *linepool = svn_pool_create(pool);
409  apr_pool_t *revpool = svn_pool_create(pool);
410  apr_pool_t *nodepool = svn_pool_create(pool);
411  int version;
412
413  SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
414  if (eof)
415    return stream_ran_dry();
416
417  /* The first two lines of the stream are the dumpfile-format version
418     number, and a blank line.  To preserve backward compatibility,
419     don't assume the existence of newer parser-vtable functions. */
420  SVN_ERR(parse_format_version(&version, linebuf->data));
421  if (parse_fns->magic_header_record != NULL)
422    SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool));
423
424  /* A dumpfile "record" is defined to be a header-block of
425     rfc822-style headers, possibly followed by a content-block.
426
427       - A header-block is always terminated by a single blank line (\n\n)
428
429       - We know whether the record has a content-block by looking for
430         a 'Content-length:' header.  The content-block will always be
431         of a specific length, plus an extra newline.
432
433     Once a record is fully sucked from the stream, an indeterminate
434     number of blank lines (or lines that begin with whitespace) may
435     follow before the next record (or the end of the stream.)
436  */
437
438  while (1)
439    {
440      apr_hash_t *headers;
441      void *node_baton;
442      svn_boolean_t found_node = FALSE;
443      svn_boolean_t old_v1_with_cl = FALSE;
444      const char *content_length;
445      const char *prop_cl;
446      const char *text_cl;
447      const char *value;
448      svn_filesize_t actual_prop_length;
449
450      /* Clear our per-line pool. */
451      svn_pool_clear(linepool);
452
453      /* Check for cancellation. */
454      if (cancel_func)
455        SVN_ERR(cancel_func(cancel_baton));
456
457      /* Keep reading blank lines until we discover a new record, or until
458         the stream runs out. */
459      SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
460
461      if (eof)
462        {
463          if (svn_stringbuf_isempty(linebuf))
464            break;   /* end of stream, go home. */
465          else
466            return stream_ran_dry();
467        }
468
469      if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0])))
470        continue; /* empty line ... loop */
471
472      /*** Found the beginning of a new record. ***/
473
474      /* The last line we read better be a header of some sort.
475         Read the whole header-block into a hash. */
476      SVN_ERR(read_header_block(stream, linebuf, &headers, linepool));
477
478      /*** Handle the various header blocks. ***/
479
480      /* Is this a revision record? */
481      if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER))
482        {
483          /* If we already have a rev_baton open, we need to close it
484             and clear the per-revision subpool. */
485          if (rev_baton != NULL)
486            {
487              SVN_ERR(parse_fns->close_revision(rev_baton));
488              svn_pool_clear(revpool);
489            }
490
491          SVN_ERR(parse_fns->new_revision_record(&rev_baton,
492                                                 headers, parse_baton,
493                                                 revpool));
494        }
495      /* Or is this, perhaps, a node record? */
496      else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH))
497        {
498          SVN_ERR(parse_fns->new_node_record(&node_baton,
499                                             headers,
500                                             rev_baton,
501                                             nodepool));
502          found_node = TRUE;
503        }
504      /* Or is this the repos UUID? */
505      else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID)))
506        {
507          SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool));
508        }
509      /* Or perhaps a dumpfile format? */
510      /* ### TODO: use parse_format_version */
511      else if ((value = svn_hash_gets(headers,
512                                      SVN_REPOS_DUMPFILE_MAGIC_HEADER)))
513        {
514          /* ### someday, switch modes of operation here. */
515          SVN_ERR(svn_cstring_atoi(&version, value));
516        }
517      /* Or is this bogosity?! */
518      else
519        {
520          /* What the heck is this record?!? */
521          return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
522                                  _("Unrecognized record type in stream"));
523        }
524
525      /* Need 3 values below to determine v1 dump type
526
527         Old (pre 0.14?) v1 dumps don't have Prop-content-length
528         and Text-content-length fields, but always have a properties
529         block in a block with Content-Length > 0 */
530
531      content_length = svn_hash_gets(headers,
532                                     SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
533      prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
534      text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
535      old_v1_with_cl =
536        version == 1 && content_length && ! prop_cl && ! text_cl;
537
538      /* Is there a props content-block to parse? */
539      if (prop_cl || old_v1_with_cl)
540        {
541          const char *delta = svn_hash_gets(headers,
542                                            SVN_REPOS_DUMPFILE_PROP_DELTA);
543          svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0);
544
545          /* First, remove all node properties, unless this is a delta
546             property block. */
547          if (found_node && !is_delta)
548            SVN_ERR(parse_fns->remove_node_props(node_baton));
549
550          SVN_ERR(parse_property_block
551                  (stream,
552                   svn__atoui64(prop_cl ? prop_cl : content_length),
553                   parse_fns,
554                   found_node ? node_baton : rev_baton,
555                   parse_baton,
556                   found_node,
557                   &actual_prop_length,
558                   found_node ? nodepool : revpool));
559        }
560
561      /* Is there a text content-block to parse? */
562      if (text_cl)
563        {
564          const char *delta = svn_hash_gets(headers,
565                                            SVN_REPOS_DUMPFILE_TEXT_DELTA);
566          svn_boolean_t is_delta = FALSE;
567          if (! deltas_are_text)
568            is_delta = (delta && strcmp(delta, "true") == 0);
569
570          SVN_ERR(parse_text_block(stream,
571                                   svn__atoui64(text_cl),
572                                   is_delta,
573                                   parse_fns,
574                                   found_node ? node_baton : rev_baton,
575                                   buffer,
576                                   buflen,
577                                   found_node ? nodepool : revpool));
578        }
579      else if (old_v1_with_cl)
580        {
581          /* An old-v1 block with a Content-length might have a text block.
582             If the property block did not consume all the bytes of the
583             Content-length, then it clearly does have a text block.
584             If not, then we must deduce whether we have an *empty* text
585             block or an *absent* text block.  The rules are:
586             - "Node-kind: file" blocks have an empty (i.e. present, but
587               zero-length) text block, since they represent a file
588               modification.  Note that file-copied-text-unmodified blocks
589               have no Content-length - even if they should have contained
590               a modified property block, the pre-0.14 dumper forgets to
591               dump the modified properties.
592             - If it is not a file node, then it is a revision or directory,
593               and so has an absent text block.
594          */
595          const char *node_kind;
596          svn_filesize_t cl_value = svn__atoui64(content_length)
597                                    - actual_prop_length;
598
599          if (cl_value ||
600              ((node_kind = svn_hash_gets(headers,
601                                          SVN_REPOS_DUMPFILE_NODE_KIND))
602               && strcmp(node_kind, "file") == 0)
603             )
604            SVN_ERR(parse_text_block(stream,
605                                     cl_value,
606                                     FALSE,
607                                     parse_fns,
608                                     found_node ? node_baton : rev_baton,
609                                     buffer,
610                                     buflen,
611                                     found_node ? nodepool : revpool));
612        }
613
614      /* if we have a content-length header, did we read all of it?
615         in case of an old v1, we *always* read all of it, because
616         text-content-length == content-length - prop-content-length
617      */
618      if (content_length && ! old_v1_with_cl)
619        {
620          apr_size_t rlen, num_to_read;
621          svn_filesize_t remaining =
622            svn__atoui64(content_length) -
623            (prop_cl ? svn__atoui64(prop_cl) : 0) -
624            (text_cl ? svn__atoui64(text_cl) : 0);
625
626
627          if (remaining < 0)
628            return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
629                                    _("Sum of subblock sizes larger than "
630                                      "total block content length"));
631
632          /* Consume remaining bytes in this content block */
633          while (remaining > 0)
634            {
635              if (remaining >= (svn_filesize_t)buflen)
636                rlen = buflen;
637              else
638                rlen = (apr_size_t) remaining;
639
640              num_to_read = rlen;
641              SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
642              remaining -= rlen;
643              if (rlen != num_to_read)
644                return stream_ran_dry();
645            }
646        }
647
648      /* If we just finished processing a node record, we need to
649         close the node record and clear the per-node subpool. */
650      if (found_node)
651        {
652          SVN_ERR(parse_fns->close_node(node_baton));
653          svn_pool_clear(nodepool);
654        }
655
656      /*** End of processing for one record. ***/
657
658    } /* end of stream */
659
660  /* Close out whatever revision we're in. */
661  if (rev_baton != NULL)
662    SVN_ERR(parse_fns->close_revision(rev_baton));
663
664  svn_pool_destroy(linepool);
665  svn_pool_destroy(revpool);
666  svn_pool_destroy(nodepool);
667  return SVN_NO_ERROR;
668}
669