1/* Functions for dealing with sparse files
2
3   Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5   This program is free software; you can redistribute it and/or modify it
6   under the terms of the GNU General Public License as published by the
7   Free Software Foundation; either version 2, or (at your option) any later
8   version.
9
10   This program is distributed in the hope that it will be useful, but
11   WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
13   Public License for more details.
14
15   You should have received a copy of the GNU General Public License along
16   with this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#include <system.h>
20#include <inttostr.h>
21#include <quotearg.h>
22#include "common.h"
23
24struct tar_sparse_file;
25static bool sparse_select_optab (struct tar_sparse_file *file);
26
27enum sparse_scan_state
28  {
29    scan_begin,
30    scan_block,
31    scan_end
32  };
33
34struct tar_sparse_optab
35{
36  bool (*init) (struct tar_sparse_file *);
37  bool (*done) (struct tar_sparse_file *);
38  bool (*sparse_member_p) (struct tar_sparse_file *);
39  bool (*dump_header) (struct tar_sparse_file *);
40  bool (*fixup_header) (struct tar_sparse_file *);
41  bool (*decode_header) (struct tar_sparse_file *);
42  bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
43		      void *);
44  bool (*dump_region) (struct tar_sparse_file *, size_t);
45  bool (*extract_region) (struct tar_sparse_file *, size_t);
46};
47
48struct tar_sparse_file
49{
50  int fd;                           /* File descriptor */
51  bool seekable;                    /* Is fd seekable? */
52  off_t offset;                     /* Current offset in fd if seekable==false.
53				       Otherwise unused */
54  off_t dumped_size;                /* Number of bytes actually written
55				       to the archive */
56  struct tar_stat_info *stat_info;  /* Information about the file */
57  struct tar_sparse_optab const *optab; /* Operation table */
58  void *closure;                    /* Any additional data optab calls might
59				       require */
60};
61
62/* Dump zeros to file->fd until offset is reached. It is used instead of
63   lseek if the output file is not seekable */
64static bool
65dump_zeros (struct tar_sparse_file *file, off_t offset)
66{
67  static char const zero_buf[BLOCKSIZE];
68
69  if (offset < file->offset)
70    {
71      errno = EINVAL;
72      return false;
73    }
74
75  while (file->offset < offset)
76    {
77      size_t size = (BLOCKSIZE < offset - file->offset
78		     ? BLOCKSIZE
79		     : offset - file->offset);
80      ssize_t wrbytes;
81
82      wrbytes = write (file->fd, zero_buf, size);
83      if (wrbytes <= 0)
84	{
85	  if (wrbytes == 0)
86	    errno = EINVAL;
87	  return false;
88	}
89      file->offset += wrbytes;
90    }
91
92  return true;
93}
94
95static bool
96tar_sparse_member_p (struct tar_sparse_file *file)
97{
98  if (file->optab->sparse_member_p)
99    return file->optab->sparse_member_p (file);
100  return false;
101}
102
103static bool
104tar_sparse_init (struct tar_sparse_file *file)
105{
106  memset (file, 0, sizeof *file);
107
108  if (!sparse_select_optab (file))
109    return false;
110
111  if (file->optab->init)
112    return file->optab->init (file);
113
114  return true;
115}
116
117static bool
118tar_sparse_done (struct tar_sparse_file *file)
119{
120  if (file->optab->done)
121    return file->optab->done (file);
122  return true;
123}
124
125static bool
126tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
127		 void *block)
128{
129  if (file->optab->scan_block)
130    return file->optab->scan_block (file, state, block);
131  return true;
132}
133
134static bool
135tar_sparse_dump_region (struct tar_sparse_file *file, size_t i)
136{
137  if (file->optab->dump_region)
138    return file->optab->dump_region (file, i);
139  return false;
140}
141
142static bool
143tar_sparse_extract_region (struct tar_sparse_file *file, size_t i)
144{
145  if (file->optab->extract_region)
146    return file->optab->extract_region (file, i);
147  return false;
148}
149
150static bool
151tar_sparse_dump_header (struct tar_sparse_file *file)
152{
153  if (file->optab->dump_header)
154    return file->optab->dump_header (file);
155  return false;
156}
157
158static bool
159tar_sparse_decode_header (struct tar_sparse_file *file)
160{
161  if (file->optab->decode_header)
162    return file->optab->decode_header (file);
163  return true;
164}
165
166static bool
167tar_sparse_fixup_header (struct tar_sparse_file *file)
168{
169  if (file->optab->fixup_header)
170    return file->optab->fixup_header (file);
171  return true;
172}
173
174
175static bool
176lseek_or_error (struct tar_sparse_file *file, off_t offset)
177{
178  if (file->seekable
179      ? lseek (file->fd, offset, SEEK_SET) < 0
180      : ! dump_zeros (file, offset))
181    {
182      seek_diag_details (file->stat_info->orig_file_name, offset);
183      return false;
184    }
185  return true;
186}
187
188/* Takes a blockful of data and basically cruises through it to see if
189   it's made *entirely* of zeros, returning a 0 the instant it finds
190   something that is a nonzero, i.e., useful data.  */
191static bool
192zero_block_p (char const *buffer, size_t size)
193{
194  while (size--)
195    if (*buffer++)
196      return false;
197  return true;
198}
199
200static void
201sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
202{
203  struct sp_array *sparse_map = st->sparse_map;
204  size_t avail = st->sparse_map_avail;
205  if (avail == st->sparse_map_size)
206    st->sparse_map = sparse_map =
207      x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map);
208  sparse_map[avail] = *sp;
209  st->sparse_map_avail = avail + 1;
210}
211
212/* Scan the sparse file and create its map */
213static bool
214sparse_scan_file (struct tar_sparse_file *file)
215{
216  struct tar_stat_info *st = file->stat_info;
217  int fd = file->fd;
218  char buffer[BLOCKSIZE];
219  size_t count;
220  off_t offset = 0;
221  struct sp_array sp = {0, 0};
222
223  if (!lseek_or_error (file, 0))
224    return false;
225
226  st->archive_file_size = 0;
227
228  if (!tar_sparse_scan (file, scan_begin, NULL))
229    return false;
230
231  while ((count = safe_read (fd, buffer, sizeof buffer)) != 0
232	 && count != SAFE_READ_ERROR)
233    {
234      /* Analyze the block.  */
235      if (zero_block_p (buffer, count))
236	{
237	  if (sp.numbytes)
238	    {
239	      sparse_add_map (st, &sp);
240	      sp.numbytes = 0;
241	      if (!tar_sparse_scan (file, scan_block, NULL))
242		return false;
243	    }
244	}
245      else
246	{
247	  if (sp.numbytes == 0)
248	    sp.offset = offset;
249	  sp.numbytes += count;
250	  st->archive_file_size += count;
251	  if (!tar_sparse_scan (file, scan_block, buffer))
252	    return false;
253	}
254
255      offset += count;
256    }
257
258  if (sp.numbytes == 0)
259    sp.offset = offset;
260
261  sparse_add_map (st, &sp);
262  st->archive_file_size += count;
263  return tar_sparse_scan (file, scan_end, NULL);
264}
265
266static struct tar_sparse_optab const oldgnu_optab;
267static struct tar_sparse_optab const star_optab;
268static struct tar_sparse_optab const pax_optab;
269
270static bool
271sparse_select_optab (struct tar_sparse_file *file)
272{
273  switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
274    {
275    case V7_FORMAT:
276    case USTAR_FORMAT:
277      return false;
278
279    case OLDGNU_FORMAT:
280    case GNU_FORMAT: /*FIXME: This one should disappear? */
281      file->optab = &oldgnu_optab;
282      break;
283
284    case POSIX_FORMAT:
285      file->optab = &pax_optab;
286      break;
287
288    case STAR_FORMAT:
289      file->optab = &star_optab;
290      break;
291
292    default:
293      return false;
294    }
295  return true;
296}
297
298static bool
299sparse_dump_region (struct tar_sparse_file *file, size_t i)
300{
301  union block *blk;
302  off_t bytes_left = file->stat_info->sparse_map[i].numbytes;
303
304  if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
305    return false;
306
307  while (bytes_left > 0)
308    {
309      size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
310      size_t bytes_read;
311
312      blk = find_next_block ();
313      bytes_read = safe_read (file->fd, blk->buffer, bufsize);
314      if (bytes_read == SAFE_READ_ERROR)
315	{
316          read_diag_details (file->stat_info->orig_file_name,
317	                     (file->stat_info->sparse_map[i].offset
318			      + file->stat_info->sparse_map[i].numbytes
319			      - bytes_left),
320			     bufsize);
321	  return false;
322	}
323
324      memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
325      bytes_left -= bytes_read;
326      file->dumped_size += bytes_read;
327      mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
328      set_next_block_after (blk);
329    }
330
331  return true;
332}
333
334static bool
335sparse_extract_region (struct tar_sparse_file *file, size_t i)
336{
337  size_t write_size;
338
339  if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
340    return false;
341
342  write_size = file->stat_info->sparse_map[i].numbytes;
343
344  if (write_size == 0)
345    {
346      /* Last block of the file is a hole */
347      if (file->seekable && sys_truncate (file->fd))
348	truncate_warn (file->stat_info->orig_file_name);
349    }
350  else while (write_size > 0)
351    {
352      size_t count;
353      size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
354      union block *blk = find_next_block ();
355      if (!blk)
356	{
357	  ERROR ((0, 0, _("Unexpected EOF in archive")));
358	  return false;
359	}
360      set_next_block_after (blk);
361      count = full_write (file->fd, blk->buffer, wrbytes);
362      write_size -= count;
363      file->dumped_size += count;
364      mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
365      file->offset += count;
366      if (count != wrbytes)
367	{
368	  write_error_details (file->stat_info->orig_file_name,
369			       count, wrbytes);
370	  return false;
371	}
372    }
373  return true;
374}
375
376
377
378/* Interface functions */
379enum dump_status
380sparse_dump_file (int fd, struct tar_stat_info *st)
381{
382  bool rc;
383  struct tar_sparse_file file;
384
385  if (!tar_sparse_init (&file))
386    return dump_status_not_implemented;
387
388  file.stat_info = st;
389  file.fd = fd;
390  file.seekable = true; /* File *must* be seekable for dump to work */
391
392  rc = sparse_scan_file (&file);
393  if (rc && file.optab->dump_region)
394    {
395      tar_sparse_dump_header (&file);
396
397      if (fd >= 0)
398	{
399	  size_t i;
400
401	  mv_begin (file.stat_info);
402	  for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
403	    rc = tar_sparse_dump_region (&file, i);
404	  mv_end ();
405	}
406    }
407
408  pad_archive (file.stat_info->archive_file_size - file.dumped_size);
409  return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
410}
411
412bool
413sparse_member_p (struct tar_stat_info *st)
414{
415  struct tar_sparse_file file;
416
417  if (!tar_sparse_init (&file))
418    return false;
419  file.stat_info = st;
420  return tar_sparse_member_p (&file);
421}
422
423bool
424sparse_fixup_header (struct tar_stat_info *st)
425{
426  struct tar_sparse_file file;
427
428  if (!tar_sparse_init (&file))
429    return false;
430  file.stat_info = st;
431  return tar_sparse_fixup_header (&file);
432}
433
434enum dump_status
435sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size)
436{
437  bool rc = true;
438  struct tar_sparse_file file;
439  size_t i;
440
441  if (!tar_sparse_init (&file))
442    return dump_status_not_implemented;
443
444  file.stat_info = st;
445  file.fd = fd;
446  file.seekable = lseek (fd, 0, SEEK_SET) == 0;
447  file.offset = 0;
448
449  rc = tar_sparse_decode_header (&file);
450  for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
451    rc = tar_sparse_extract_region (&file, i);
452  *size = file.stat_info->archive_file_size - file.dumped_size;
453  return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
454}
455
456enum dump_status
457sparse_skip_file (struct tar_stat_info *st)
458{
459  bool rc = true;
460  struct tar_sparse_file file;
461
462  if (!tar_sparse_init (&file))
463    return dump_status_not_implemented;
464
465  file.stat_info = st;
466  file.fd = -1;
467
468  rc = tar_sparse_decode_header (&file);
469  skip_file (file.stat_info->archive_file_size - file.dumped_size);
470  return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
471}
472
473
474static bool
475check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
476{
477  if (!lseek_or_error (file, beg))
478    return false;
479
480  while (beg < end)
481    {
482      size_t bytes_read;
483      size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg;
484      char diff_buffer[BLOCKSIZE];
485
486      bytes_read = safe_read (file->fd, diff_buffer, rdsize);
487      if (bytes_read == SAFE_READ_ERROR)
488	{
489          read_diag_details (file->stat_info->orig_file_name,
490	                     beg,
491			     rdsize);
492	  return false;
493	}
494      if (!zero_block_p (diff_buffer, bytes_read))
495	{
496	  char begbuf[INT_BUFSIZE_BOUND (off_t)];
497 	  report_difference (file->stat_info,
498			     _("File fragment at %s is not a hole"),
499			     offtostr (beg, begbuf));
500	  return false;
501	}
502
503      beg += bytes_read;
504    }
505  return true;
506}
507
508static bool
509check_data_region (struct tar_sparse_file *file, size_t i)
510{
511  size_t size_left;
512
513  if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
514    return false;
515  size_left = file->stat_info->sparse_map[i].numbytes;
516  mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
517
518  while (size_left > 0)
519    {
520      size_t bytes_read;
521      size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
522      char diff_buffer[BLOCKSIZE];
523
524      union block *blk = find_next_block ();
525      if (!blk)
526	{
527	  ERROR ((0, 0, _("Unexpected EOF in archive")));
528	  return false;
529	}
530      set_next_block_after (blk);
531      bytes_read = safe_read (file->fd, diff_buffer, rdsize);
532      if (bytes_read == SAFE_READ_ERROR)
533	{
534          read_diag_details (file->stat_info->orig_file_name,
535			     (file->stat_info->sparse_map[i].offset
536			      + file->stat_info->sparse_map[i].numbytes
537			      - size_left),
538			     rdsize);
539	  return false;
540	}
541      file->dumped_size += bytes_read;
542      size_left -= bytes_read;
543      mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
544      if (memcmp (blk->buffer, diff_buffer, rdsize))
545	{
546	  report_difference (file->stat_info, _("Contents differ"));
547	  return false;
548	}
549    }
550  return true;
551}
552
553bool
554sparse_diff_file (int fd, struct tar_stat_info *st)
555{
556  bool rc = true;
557  struct tar_sparse_file file;
558  size_t i;
559  off_t offset = 0;
560
561  if (!tar_sparse_init (&file))
562    return dump_status_not_implemented;
563
564  file.stat_info = st;
565  file.fd = fd;
566  file.seekable = true; /* File *must* be seekable for compare to work */
567
568  rc = tar_sparse_decode_header (&file);
569  mv_begin (st);
570  for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
571    {
572      rc = check_sparse_region (&file,
573				offset, file.stat_info->sparse_map[i].offset)
574	    && check_data_region (&file, i);
575      offset = file.stat_info->sparse_map[i].offset
576	        + file.stat_info->sparse_map[i].numbytes;
577    }
578
579  if (!rc)
580    skip_file (file.stat_info->archive_file_size - file.dumped_size);
581  mv_end ();
582
583  tar_sparse_done (&file);
584  return rc;
585}
586
587
588/* Old GNU Format. The sparse file information is stored in the
589   oldgnu_header in the following manner:
590
591   The header is marked with type 'S'. Its `size' field contains
592   the cumulative size of all non-empty blocks of the file. The
593   actual file size is stored in `realsize' member of oldgnu_header.
594
595   The map of the file is stored in a list of `struct sparse'.
596   Each struct contains offset to the block of data and its
597   size (both as octal numbers). The first file header contains
598   at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
599   contains more structs, then the field `isextended' of the main
600   header is set to 1 (binary) and the `struct sparse_header'
601   header follows, containing at most 21 following structs
602   (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
603   field of the extended header is set and next  next extension header
604   follows, etc... */
605
606enum oldgnu_add_status
607  {
608    add_ok,
609    add_finish,
610    add_fail
611  };
612
613static bool
614oldgnu_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
615{
616  return current_header->header.typeflag == GNUTYPE_SPARSE;
617}
618
619/* Add a sparse item to the sparse file and its obstack */
620static enum oldgnu_add_status
621oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
622{
623  struct sp_array sp;
624
625  if (s->numbytes[0] == '\0')
626    return add_finish;
627  sp.offset = OFF_FROM_HEADER (s->offset);
628  sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
629  if (sp.offset < 0
630      || file->stat_info->stat.st_size < sp.offset + sp.numbytes
631      || file->stat_info->archive_file_size < 0)
632    return add_fail;
633
634  sparse_add_map (file->stat_info, &sp);
635  return add_ok;
636}
637
638static bool
639oldgnu_fixup_header (struct tar_sparse_file *file)
640{
641  /* NOTE! st_size was initialized from the header
642     which actually contains archived size. The following fixes it */
643  file->stat_info->archive_file_size = file->stat_info->stat.st_size;
644  file->stat_info->stat.st_size =
645    OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
646  return true;
647}
648
649/* Convert old GNU format sparse data to internal representation */
650static bool
651oldgnu_get_sparse_info (struct tar_sparse_file *file)
652{
653  size_t i;
654  union block *h = current_header;
655  int ext_p;
656  enum oldgnu_add_status rc;
657
658  file->stat_info->sparse_map_avail = 0;
659  for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
660    {
661      rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
662      if (rc != add_ok)
663	break;
664    }
665
666  for (ext_p = h->oldgnu_header.isextended;
667       rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
668    {
669      h = find_next_block ();
670      if (!h)
671	{
672	  ERROR ((0, 0, _("Unexpected EOF in archive")));
673	  return false;
674	}
675      set_next_block_after (h);
676      for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
677	rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
678    }
679
680  if (rc == add_fail)
681    {
682      ERROR ((0, 0, _("%s: invalid sparse archive member"),
683	      file->stat_info->orig_file_name));
684      return false;
685    }
686  return true;
687}
688
689static void
690oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
691			  struct sparse *sp, size_t sparse_size)
692{
693  for (; *pindex < file->stat_info->sparse_map_avail
694	 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
695    {
696      OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
697		    sp->offset);
698      SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
699		     sp->numbytes);
700    }
701}
702
703static bool
704oldgnu_dump_header (struct tar_sparse_file *file)
705{
706  off_t block_ordinal = current_block_ordinal ();
707  union block *blk;
708  size_t i;
709
710  blk = start_header (file->stat_info);
711  blk->header.typeflag = GNUTYPE_SPARSE;
712  if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
713    blk->oldgnu_header.isextended = 1;
714
715  /* Store the real file size */
716  OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
717  /* Store the effective (shrunken) file size */
718  OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
719
720  i = 0;
721  oldgnu_store_sparse_info (file, &i,
722			    blk->oldgnu_header.sp,
723			    SPARSES_IN_OLDGNU_HEADER);
724  blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
725  finish_header (file->stat_info, blk, block_ordinal);
726
727  while (i < file->stat_info->sparse_map_avail)
728    {
729      blk = find_next_block ();
730      memset (blk->buffer, 0, BLOCKSIZE);
731      oldgnu_store_sparse_info (file, &i,
732				blk->sparse_header.sp,
733				SPARSES_IN_SPARSE_HEADER);
734      if (i < file->stat_info->sparse_map_avail)
735	blk->sparse_header.isextended = 1;
736      set_next_block_after (blk);
737    }
738  return true;
739}
740
741static struct tar_sparse_optab const oldgnu_optab = {
742  NULL,  /* No init function */
743  NULL,  /* No done function */
744  oldgnu_sparse_member_p,
745  oldgnu_dump_header,
746  oldgnu_fixup_header,
747  oldgnu_get_sparse_info,
748  NULL,  /* No scan_block function */
749  sparse_dump_region,
750  sparse_extract_region,
751};
752
753
754/* Star */
755
756static bool
757star_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
758{
759  return current_header->header.typeflag == GNUTYPE_SPARSE;
760}
761
762static bool
763star_fixup_header (struct tar_sparse_file *file)
764{
765  /* NOTE! st_size was initialized from the header
766     which actually contains archived size. The following fixes it */
767  file->stat_info->archive_file_size = file->stat_info->stat.st_size;
768  file->stat_info->stat.st_size =
769            OFF_FROM_HEADER (current_header->star_in_header.realsize);
770  return true;
771}
772
773/* Convert STAR format sparse data to internal representation */
774static bool
775star_get_sparse_info (struct tar_sparse_file *file)
776{
777  size_t i;
778  union block *h = current_header;
779  int ext_p;
780  enum oldgnu_add_status rc = add_ok;
781
782  file->stat_info->sparse_map_avail = 0;
783
784  if (h->star_in_header.prefix[0] == '\0'
785      && h->star_in_header.sp[0].offset[10] != '\0')
786    {
787      /* Old star format */
788      for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
789	{
790	  rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
791	  if (rc != add_ok)
792	    break;
793	}
794      ext_p = h->star_in_header.isextended;
795    }
796  else
797    ext_p = 1;
798
799  for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
800    {
801      h = find_next_block ();
802      if (!h)
803	{
804	  ERROR ((0, 0, _("Unexpected EOF in archive")));
805	  return false;
806	}
807      set_next_block_after (h);
808      for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
809	rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
810    }
811
812  if (rc == add_fail)
813    {
814      ERROR ((0, 0, _("%s: invalid sparse archive member"),
815	      file->stat_info->orig_file_name));
816      return false;
817    }
818  return true;
819}
820
821
822static struct tar_sparse_optab const star_optab = {
823  NULL,  /* No init function */
824  NULL,  /* No done function */
825  star_sparse_member_p,
826  NULL,
827  star_fixup_header,
828  star_get_sparse_info,
829  NULL,  /* No scan_block function */
830  NULL, /* No dump region function */
831  sparse_extract_region,
832};
833
834
835/* GNU PAX sparse file format. There are several versions:
836
837   * 0.0
838
839   The initial version of sparse format used by tar 1.14-1.15.1.
840   The sparse file map is stored in x header:
841
842   GNU.sparse.size      Real size of the stored file
843   GNU.sparse.numblocks Number of blocks in the sparse map
844   repeat numblocks time
845     GNU.sparse.offset    Offset of the next data block
846     GNU.sparse.numbytes  Size of the next data block
847   end repeat
848
849   This has been reported as conflicting with the POSIX specs. The reason is
850   that offsets and sizes of non-zero data blocks were stored in multiple
851   instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas
852   POSIX requires the latest occurrence of the variable to override all
853   previous occurrences.
854
855   To avoid this incompatibility two following versions were introduced.
856
857   * 0.1
858
859   Used by tar 1.15.2 -- 1.15.91 (alpha releases).
860
861   The sparse file map is stored in
862   x header:
863
864   GNU.sparse.size      Real size of the stored file
865   GNU.sparse.numblocks Number of blocks in the sparse map
866   GNU.sparse.map       Map of non-null data chunks. A string consisting
867                       of comma-separated values "offset,size[,offset,size]..."
868
869   The resulting GNU.sparse.map string can be *very* long. While POSIX does not
870   impose any limit on the length of a x header variable, this can confuse some
871   tars.
872
873   * 1.0
874
875   Starting from this version, the exact sparse format version is specified
876   explicitely in the header using the following variables:
877
878   GNU.sparse.major     Major version
879   GNU.sparse.minor     Minor version
880
881   X header keeps the following variables:
882
883   GNU.sparse.name      Real file name of the sparse file
884   GNU.sparse.realsize  Real size of the stored file (corresponds to the old
885                        GNU.sparse.size variable)
886
887   The name field of the ustar header is constructed using the pattern
888   "%d/GNUSparseFile.%p/%f".
889
890   The sparse map itself is stored in the file data block, preceding the actual
891   file data. It consists of a series of octal numbers of arbitrary length,
892   delimited by newlines. The map is padded with nulls to the nearest block
893   boundary.
894
895   The first number gives the number of entries in the map. Following are map
896   entries, each one consisting of two numbers giving the offset and size of
897   the data block it describes.
898
899   The format is designed in such a way that non-posix aware tars and tars not
900   supporting GNU.sparse.* keywords will extract each sparse file in its
901   condensed form with the file map attached and will place it into a separate
902   directory. Then, using a simple program it would be possible to expand the
903   file to its original form even without GNU tar.
904
905   Bu default, v.1.0 archives are created. To use other formats,
906   --sparse-version option is provided. Additionally, v.0.0 can be obtained
907   by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1
908   --pax-option delete=GNU.sparse.map
909*/
910
911static bool
912pax_sparse_member_p (struct tar_sparse_file *file)
913{
914  return file->stat_info->sparse_map_avail > 0
915          || file->stat_info->sparse_major > 0;
916}
917
918static bool
919pax_dump_header_0 (struct tar_sparse_file *file)
920{
921  off_t block_ordinal = current_block_ordinal ();
922  union block *blk;
923  size_t i;
924  char nbuf[UINTMAX_STRSIZE_BOUND];
925  struct sp_array *map = file->stat_info->sparse_map;
926  char *save_file_name = NULL;
927
928  /* Store the real file size */
929  xheader_store ("GNU.sparse.size", file->stat_info, NULL);
930  xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
931
932  if (xheader_keyword_deleted_p ("GNU.sparse.map")
933      || tar_sparse_minor == 0)
934    {
935      for (i = 0; i < file->stat_info->sparse_map_avail; i++)
936	{
937	  xheader_store ("GNU.sparse.offset", file->stat_info, &i);
938	  xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
939	}
940    }
941  else
942    {
943      xheader_store ("GNU.sparse.name", file->stat_info, NULL);
944      save_file_name = file->stat_info->file_name;
945      file->stat_info->file_name = xheader_format_name (file->stat_info,
946					       "%d/GNUSparseFile.%p/%f", 0);
947
948      xheader_string_begin (&file->stat_info->xhdr);
949      for (i = 0; i < file->stat_info->sparse_map_avail; i++)
950	{
951	  if (i)
952	    xheader_string_add (&file->stat_info->xhdr, ",");
953	  xheader_string_add (&file->stat_info->xhdr,
954			      umaxtostr (map[i].offset, nbuf));
955	  xheader_string_add (&file->stat_info->xhdr, ",");
956	  xheader_string_add (&file->stat_info->xhdr,
957			      umaxtostr (map[i].numbytes, nbuf));
958	}
959      if (!xheader_string_end (&file->stat_info->xhdr,
960			       "GNU.sparse.map"))
961	{
962	  free (file->stat_info->file_name);
963	  file->stat_info->file_name = save_file_name;
964	  return false;
965	}
966    }
967  blk = start_header (file->stat_info);
968  /* Store the effective (shrunken) file size */
969  OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
970  finish_header (file->stat_info, blk, block_ordinal);
971  if (save_file_name)
972    {
973      free (file->stat_info->file_name);
974      file->stat_info->file_name = save_file_name;
975    }
976  return true;
977}
978
979static bool
980pax_dump_header_1 (struct tar_sparse_file *file)
981{
982  off_t block_ordinal = current_block_ordinal ();
983  union block *blk;
984  char *p, *q;
985  size_t i;
986  char nbuf[UINTMAX_STRSIZE_BOUND];
987  off_t size = 0;
988  struct sp_array *map = file->stat_info->sparse_map;
989  char *save_file_name = file->stat_info->file_name;
990
991#define COPY_STRING(b,dst,src) do                \
992 {                                               \
993   char *endp = b->buffer + BLOCKSIZE;           \
994   char *srcp = src;                             \
995   while (*srcp)                                 \
996     {                                           \
997       if (dst == endp)                          \
998	 {                                       \
999	   set_next_block_after (b);             \
1000	   b = find_next_block ();               \
1001           dst = b->buffer;                      \
1002	   endp = b->buffer + BLOCKSIZE;         \
1003	 }                                       \
1004       *dst++ = *srcp++;                         \
1005     }                                           \
1006   } while (0)
1007
1008  /* Compute stored file size */
1009  p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1010  size += strlen (p) + 1;
1011  for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1012    {
1013      p = umaxtostr (map[i].offset, nbuf);
1014      size += strlen (p) + 1;
1015      p = umaxtostr (map[i].numbytes, nbuf);
1016      size += strlen (p) + 1;
1017    }
1018  size = (size + BLOCKSIZE - 1) / BLOCKSIZE;
1019  file->stat_info->archive_file_size += size * BLOCKSIZE;
1020  file->dumped_size += size * BLOCKSIZE;
1021
1022  /* Store sparse file identification */
1023  xheader_store ("GNU.sparse.major", file->stat_info, NULL);
1024  xheader_store ("GNU.sparse.minor", file->stat_info, NULL);
1025  xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1026  xheader_store ("GNU.sparse.realsize", file->stat_info, NULL);
1027
1028  file->stat_info->file_name = xheader_format_name (file->stat_info,
1029					    "%d/GNUSparseFile.%p/%f", 0);
1030
1031  blk = start_header (file->stat_info);
1032  /* Store the effective (shrunken) file size */
1033  OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
1034  finish_header (file->stat_info, blk, block_ordinal);
1035  free (file->stat_info->file_name);
1036  file->stat_info->file_name = save_file_name;
1037
1038  blk = find_next_block ();
1039  q = blk->buffer;
1040  p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1041  COPY_STRING (blk, q, p);
1042  COPY_STRING (blk, q, "\n");
1043  for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1044    {
1045      p = umaxtostr (map[i].offset, nbuf);
1046      COPY_STRING (blk, q, p);
1047      COPY_STRING (blk, q, "\n");
1048      p = umaxtostr (map[i].numbytes, nbuf);
1049      COPY_STRING (blk, q, p);
1050      COPY_STRING (blk, q, "\n");
1051    }
1052  memset (q, 0, BLOCKSIZE - (q - blk->buffer));
1053  set_next_block_after (blk);
1054  return true;
1055}
1056
1057static bool
1058pax_dump_header (struct tar_sparse_file *file)
1059{
1060  file->stat_info->sparse_major = tar_sparse_major;
1061  file->stat_info->sparse_minor = tar_sparse_minor;
1062
1063  return (file->stat_info->sparse_major == 0) ?
1064           pax_dump_header_0 (file) : pax_dump_header_1 (file);
1065}
1066
1067static bool
1068decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
1069{
1070  uintmax_t u;
1071  char *arg_lim;
1072
1073  if (!ISDIGIT (*arg))
1074    return false;
1075
1076  u = strtoumax (arg, &arg_lim, 10);
1077
1078  if (! (u <= maxval && errno != ERANGE) || *arg_lim)
1079    return false;
1080
1081  *num = u;
1082  return true;
1083}
1084
1085static bool
1086pax_decode_header (struct tar_sparse_file *file)
1087{
1088  if (file->stat_info->sparse_major > 0)
1089    {
1090      uintmax_t u;
1091      char nbuf[UINTMAX_STRSIZE_BOUND];
1092      union block *blk;
1093      char *p;
1094      size_t i;
1095
1096#define COPY_BUF(b,buf,src) do                                     \
1097 {                                                                 \
1098   char *endp = b->buffer + BLOCKSIZE;                             \
1099   char *dst = buf;                                                \
1100   do                                                              \
1101     {                                                             \
1102       if (dst == buf + UINTMAX_STRSIZE_BOUND -1)                  \
1103         {                                                         \
1104           ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \
1105	          file->stat_info->orig_file_name));               \
1106           return false;                                           \
1107         }                                                         \
1108       if (src == endp)                                            \
1109	 {                                                         \
1110	   set_next_block_after (b);                               \
1111           file->dumped_size += BLOCKSIZE;                         \
1112           b = find_next_block ();                                 \
1113           src = b->buffer;                                        \
1114	   endp = b->buffer + BLOCKSIZE;                           \
1115	 }                                                         \
1116       *dst = *src++;                                              \
1117     }                                                             \
1118   while (*dst++ != '\n');                                         \
1119   dst[-1] = 0;                                                    \
1120 } while (0)
1121
1122      set_next_block_after (current_header);
1123      file->dumped_size += BLOCKSIZE;
1124      blk = find_next_block ();
1125      p = blk->buffer;
1126      COPY_BUF (blk,nbuf,p);
1127      if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1128	{
1129	  ERROR ((0, 0, _("%s: malformed sparse archive member"),
1130		  file->stat_info->orig_file_name));
1131	  return false;
1132	}
1133      file->stat_info->sparse_map_size = u;
1134      file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size,
1135					     sizeof (*file->stat_info->sparse_map));
1136      file->stat_info->sparse_map_avail = 0;
1137      for (i = 0; i < file->stat_info->sparse_map_size; i++)
1138	{
1139	  struct sp_array sp;
1140
1141	  COPY_BUF (blk,nbuf,p);
1142	  if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1143	    {
1144	      ERROR ((0, 0, _("%s: malformed sparse archive member"),
1145		      file->stat_info->orig_file_name));
1146	      return false;
1147	    }
1148	  sp.offset = u;
1149	  COPY_BUF (blk,nbuf,p);
1150	  if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1151	    {
1152	      ERROR ((0, 0, _("%s: malformed sparse archive member"),
1153		      file->stat_info->orig_file_name));
1154	      return false;
1155	    }
1156	  sp.numbytes = u;
1157	  sparse_add_map (file->stat_info, &sp);
1158	}
1159      set_next_block_after (blk);
1160    }
1161
1162  return true;
1163}
1164
1165static struct tar_sparse_optab const pax_optab = {
1166  NULL,  /* No init function */
1167  NULL,  /* No done function */
1168  pax_sparse_member_p,
1169  pax_dump_header,
1170  NULL,
1171  pax_decode_header,
1172  NULL,  /* No scan_block function */
1173  sparse_dump_region,
1174  sparse_extract_region,
1175};
1176