1/* Copyright (C) 2021 Free Software Foundation, Inc.
2   Contributed by Oracle.
3
4   This file is part of GNU Binutils.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, 51 Franklin Street - Fifth Floor, Boston,
19   MA 02110-1301, USA.  */
20
21#include "config.h"
22#include <sys/types.h>
23#include <fcntl.h>
24#include <unistd.h>
25#include <errno.h>
26
27#include "zlib.h"
28#include "util.h"
29#include "DbeJarFile.h"
30#include "Data_window.h"
31#include "vec.h"
32
33static uint32_t
34get_u1 (unsigned char *b)
35{
36  return (uint32_t) ((b)[0]);
37}
38
39static uint32_t
40get_u2 (unsigned char *b)
41{
42  return (get_u1 (b + 1) << 8) | get_u1 (b);
43}
44
45static uint32_t
46get_u4 (unsigned char *b)
47{
48  return (get_u2 (b + 2) << 16) | get_u2 (b);
49}
50
51static uint64_t
52get_u8 (unsigned char *b)
53{
54  return (((uint64_t) get_u4 (b + 4)) << 32) | get_u4 (b);
55}
56
57enum
58{
59  END_CENT_DIR_SIZE     = 22,
60  LOC_FILE_HEADER_SIZE  = 30,
61  CENT_FILE_HEADER_SIZE = 46,
62  ZIP64_LOCATOR_SIZE    = 20,
63  ZIP64_CENT_DIR_SIZE   = 56,
64  ZIP_BUF_SIZE          = 65536
65};
66
67struct EndCentDir
68{
69  uint64_t count;
70  uint64_t size;
71  uint64_t offset;
72};
73
74class ZipEntry
75{
76public:
77
78  ZipEntry ()
79  {
80    name = NULL;
81    data_offset = 0;
82  }
83
84  ~ZipEntry ()
85  {
86    free (name);
87  }
88
89  int
90  compare (ZipEntry *ze)
91  {
92    return dbe_strcmp (name, ze->name);
93  }
94
95  char *name;       // entry name
96  int time;         // modification time
97  int64_t size;     // size of uncompressed data
98  int64_t csize;    // size of compressed data (zero if uncompressed)
99  uint32_t compressionMethod;
100  int64_t offset;   // offset of LOC header
101  int64_t data_offset;
102};
103
104static int
105cmp_names (const void *a, const void *b)
106{
107  ZipEntry *e1 = *((ZipEntry **) a);
108  ZipEntry *e2 = *((ZipEntry **) b);
109  return e1->compare (e2);
110}
111
112template<> void Vector<ZipEntry *>::dump (const char *msg)
113{
114  Dprintf (1, NTXT ("Vector<ZipEntry *> %s  [%lld]\n"), msg ? msg : NTXT (""), (long long) size ());
115  for (long i = 0, sz = size (); i < sz; i++)
116    {
117      ZipEntry *ze = get (i);
118      Dprintf (1, NTXT ("  %lld offset:%lld (0x%llx) size: %lld --> %lld %s\n"),
119	       (long long) i, (long long) ze->offset, (long long) ze->offset,
120	       (long long) ze->csize, (long long) ze->size, STR (ze->name));
121    }
122}
123
124DbeJarFile::DbeJarFile (const char *jarName)
125{
126  name = strdup (jarName);
127  fnames = NULL;
128  dwin = new Data_window (name);
129  get_entries ();
130}
131
132DbeJarFile::~DbeJarFile ()
133{
134  free (name);
135  delete fnames;
136}
137
138void
139DbeJarFile::get_entries ()
140{
141  Dprintf (DUMP_JAR_FILE, NTXT ("\nArchive: %s\n"), STR (name));
142  if (dwin->not_opened ())
143    {
144      append_msg (CMSG_ERROR, GTXT ("Cannot open file `%s'"), name);
145      return;
146    }
147  struct EndCentDir endCentDir;
148  if (get_EndCentDir (&endCentDir) == 0)
149    return;
150
151  if (endCentDir.count == 0)
152    {
153      append_msg (CMSG_WARN, GTXT ("No files in %s"), name);
154      return;
155    }
156  unsigned char *b = (unsigned char *) dwin->bind (endCentDir.offset, endCentDir.size);
157  if (b == NULL)
158    {
159      append_msg (CMSG_ERROR, GTXT ("%s: cannot read the central directory record"), name);
160      return;
161    }
162
163  fnames = new Vector<ZipEntry*>(endCentDir.count);
164  for (uint64_t i = 0, offset = endCentDir.offset, last = endCentDir.offset + endCentDir.size; i < endCentDir.count; i++)
165    {
166      if ((last - offset) < CENT_FILE_HEADER_SIZE)
167	{
168	  append_msg (CMSG_ERROR, GTXT ("%s: cannot read the central file header (%lld (from %lld), offset=0x%016llx last=0x%016llx"),
169		      name, (long long) i, (long long) endCentDir.count, (long long) offset, (long long) last);
170	  break;
171	}
172      b = (unsigned char *) dwin->bind (offset, CENT_FILE_HEADER_SIZE);
173      //  Central file header
174      //  Offset Bytes    Description
175      //     0     4   central file header signature = 0x02014b50
176      //     4     2   version made by
177      //     6     2   version needed to extract
178      //     8     2   general purpose bit flag
179      //    10     2   compression method
180      //    12     2   last mod file time
181      //    14     2   last mod file date
182      //    16     4   crc-32
183      //    20     4   compressed size
184      //    24     4   uncompressed size
185      //    28     2   file name length
186      //    30     2   extra field length
187      //    32     2   file comment length
188      //    34     2   disk number start
189      //    36     2   internal file attributes
190      //    38     4   external file attributes
191      //    42     4   relative offset of local header
192      //    46         file name (variable size)
193      //               extra field (variable size)
194      //               file comment (variable size)
195      uint32_t signature = get_u4 (b);
196      if (signature != 0x02014b50)
197	{
198	  append_msg (CMSG_ERROR, GTXT ("%s: wrong header signature (%lld (total %lld), offset=0x%016llx last=0x%016llx"),
199		      name, (long long) i, (long long) endCentDir.count, (long long) offset, (long long) last);
200	  break;
201	}
202      ZipEntry *ze = new ZipEntry ();
203      fnames->append (ze);
204      uint32_t name_len = get_u2 (b + 28);
205      uint32_t extra_len = get_u2 (b + 30);
206      uint32_t comment_len = get_u2 (b + 32);
207      ze->compressionMethod = get_u2 (b + 10);
208      ze->csize = get_u4 (b + 20);
209      ze->size = get_u4 (b + 24);
210      ze->offset = get_u4 (b + 42);
211      char *nm = (char *) dwin->bind (offset + 46, name_len);
212      if (nm)
213	{
214	  ze->name = (char *) malloc (name_len + 1);
215	  strncpy (ze->name, nm, name_len);
216	  ze->name[name_len] = 0;
217	}
218      offset += CENT_FILE_HEADER_SIZE + name_len + extra_len + comment_len;
219    }
220  fnames->sort (cmp_names);
221  if (DUMP_JAR_FILE)
222    fnames->dump (get_basename (name));
223}
224
225int
226DbeJarFile::get_entry (const char *fname)
227{
228  if (fnames == NULL)
229    return -1;
230  ZipEntry zipEntry, *ze = &zipEntry;
231  ze->name = (char *) fname;
232  int ind = fnames->bisearch (0, -1, &ze, cmp_names);
233  ze->name = NULL;
234  return ind;
235}
236
237long long
238DbeJarFile::copy (char *toFileNname, int fromEntryNum)
239{
240  if (fromEntryNum < 0 || fromEntryNum >= VecSize (fnames))
241    return -1;
242  ZipEntry *ze = fnames->get (fromEntryNum);
243  if (ze->data_offset == 0)
244    {
245      //  Local file header
246      //  Offset Bytes    Description
247      //     0     4   local file header signature = 0x04034b50
248      //     4     2   version needed to extract
249      //     6     2   general purpose bit flag
250      //     8     2   compression method
251      //    10     2   last mod file time
252      //    12     2   last mod file date
253      //    14     4   crc-32
254      //    18     4   compressed size
255      //    22     4   uncompressed size
256      //    26     2   file name length
257      //    28     2   extra field length
258      //    30     2   file name (variable size)
259      //               extra field (variable size)
260      unsigned char *b = (unsigned char *) dwin->bind (ze->offset, LOC_FILE_HEADER_SIZE);
261      if (b == NULL)
262	{
263	  append_msg (CMSG_ERROR,
264		 GTXT ("%s: Cannot read a local file header (%s offset=0x%lld"),
265		 name, STR (ze->name), (long long) ze->offset);
266	  return -1;
267	}
268      uint32_t signature = get_u4 (b);
269      if (signature != 0x04034b50)
270	{
271	  append_msg (CMSG_ERROR,
272		      GTXT ("%s: wrong local header signature ('%s' offset=%lld (0x%llx)"),
273		      name, STR (ze->name), (long long) ze->offset,
274		      (long long) ze->offset);
275	  return -1;
276	}
277      ze->data_offset = ze->offset + LOC_FILE_HEADER_SIZE + get_u2 (b + 26) + get_u2 (b + 28);
278    }
279
280  if (ze->compressionMethod == 0)
281    {
282      int fd = open (toFileNname, O_CREAT | O_WRONLY | O_LARGEFILE, 0644);
283      if (fd == -1)
284	{
285	  append_msg (CMSG_ERROR, GTXT ("Cannot create file %s (%s)"), toFileNname, STR (strerror (errno)));
286	  return -1;
287	}
288      long long len = dwin->copy_to_file (fd, ze->data_offset, ze->size);
289      close (fd);
290      if (len != ze->size)
291	{
292	  append_msg (CMSG_ERROR, GTXT ("%s: Cannot write %lld bytes (only %lld)"),
293		      toFileNname, (long long) ze->size, (long long) len);
294	  unlink (toFileNname);
295	  return -1;
296	}
297      return len;
298    }
299
300  unsigned char *b = (unsigned char *) dwin->bind (ze->data_offset, ze->csize);
301  if (b == NULL)
302    {
303      append_msg (CMSG_ERROR,
304		  GTXT ("%s: Cannot extract file %s (offset=0x%lld csize=%lld)"),
305		  name, STR (ze->name), (long long) ze->offset,
306		  (long long) ze->csize);
307      return -1;
308    }
309  z_stream strm;
310  strm.zalloc = Z_NULL;
311  strm.zfree = Z_NULL;
312  strm.opaque = Z_NULL;
313  strm.next_in = Z_NULL;
314  strm.avail_in = 0;
315  if (inflateInit2 (&strm, -MAX_WBITS) != Z_OK)
316    {
317      append_msg (CMSG_ERROR, GTXT ("%s: inflateInit2 failed (%s)"), STR (ze->name), STR (strm.msg));
318      return -1;
319    }
320  strm.avail_in = ze->csize;
321  strm.next_in = b;
322  int retval = ze->size;
323  unsigned char *buf = (unsigned char *) malloc (ze->size);
324  for (;;)
325    {
326      strm.next_out = buf;
327      strm.avail_out = ze->size;
328      int ret = inflate (&strm, Z_SYNC_FLUSH);
329      if ((ret == Z_NEED_DICT) || (ret == Z_DATA_ERROR) || (ret == Z_MEM_ERROR) || (ret == Z_STREAM_ERROR))
330	{
331	  append_msg (CMSG_ERROR, GTXT ("%s: inflate('%s') error %d (%s)"), name, STR (ze->name), ret, STR (strm.msg));
332	  retval = -1;
333	  break;
334	}
335      if (strm.avail_out != 0)
336	break;
337    }
338  inflateEnd (&strm);
339  if (retval != -1)
340    {
341      int fd = open (toFileNname, O_CREAT | O_WRONLY | O_LARGEFILE, 0644);
342      if (fd == -1)
343	{
344	  append_msg (CMSG_ERROR, GTXT ("Cannot create file %s (%s)"), toFileNname, STR (strerror (errno)));
345	  retval = -1;
346	}
347      else
348	{
349	  long long len = write (fd, buf, ze->size);
350	  if (len != ze->size)
351	    {
352	      append_msg (CMSG_ERROR, GTXT ("%s: Cannot write %lld bytes (only %lld)"),
353			  toFileNname, (long long) strm.avail_out, (long long) len);
354	      retval = -1;
355	    }
356	  close (fd);
357	}
358    }
359  free (buf);
360  return retval;
361}
362
363int
364DbeJarFile::get_EndCentDir (struct EndCentDir *endCentDir)
365{
366  int64_t fsize = dwin->get_fsize ();
367  int64_t sz = (fsize < ZIP_BUF_SIZE) ? fsize : ZIP_BUF_SIZE;
368
369  // Find the end of central directory record:
370  unsigned char *b = (unsigned char *) dwin->bind (fsize - sz, sz);
371  if (b == NULL)
372    {
373      append_msg (CMSG_ERROR, GTXT ("%s: cannot find the central directory record (fsize=%lld)"),
374		  name, (long long) fsize);
375      return 0;
376    }
377
378  //  End of central directory record:
379  //  Offset Bytes    Description
380  //     0     4    end of central directory signature = 0x06054b50
381  //     4     2    number of this disk
382  //     6     2    disk where central directory starts
383  //     8     2    number of central directory records on this disk
384  //    10     2    total number of central directory records
385  //    12     4    size of central directory(bytes)
386  //    16     4    offset of start of central directory, relative to start of archive
387  //    20     2    comment length(n)
388  //    22     n    comment
389
390  endCentDir->count = 0;
391  endCentDir->size = 0;
392  endCentDir->offset = 0;
393  int64_t ecdrOffset = fsize;
394  for (int64_t i = END_CENT_DIR_SIZE; i < sz; i++)
395    {
396      b = (unsigned char *) dwin->bind (fsize - i, END_CENT_DIR_SIZE);
397      if (b == NULL)
398	{
399	  append_msg (CMSG_ERROR, GTXT ("%s: read failed (offset:0x%llx  bytes:%lld"),
400		      name, (long long) (fsize - i), (long long) END_CENT_DIR_SIZE);
401	  break;
402	}
403      uint32_t signature = get_u4 (b);
404      if (signature == 0x06054b50)
405	{
406	  int64_t len_comment = get_u2 (b + 20);
407	  if (i != (len_comment + END_CENT_DIR_SIZE))
408	    continue;
409	  ecdrOffset = fsize - i;
410	  endCentDir->count = get_u2 (b + 10);
411	  endCentDir->size = get_u4 (b + 12);
412	  endCentDir->offset = get_u4 (b + 16);
413	  Dprintf (DUMP_JAR_FILE,
414		   "  Zip archive file size:              %10lld (0x%016llx)\n"
415		   "  end-cent-dir record offset:         %10lld (0x%016llx)\n"
416		   "  cent-dir offset:                    %10lld (0x%016llx)\n"
417		   "  cent-dir size:                      %10lld (0x%016llx)\n"
418		   "  cent-dir entries:                   %10lld\n",
419		   (long long) fsize, (long long) fsize,
420		   (long long) ecdrOffset, (long long) ecdrOffset,
421		   (long long) endCentDir->offset, (long long) endCentDir->offset,
422		   (long long) endCentDir->size, (long long) endCentDir->size,
423		   (long long) endCentDir->count);
424	  break;
425	}
426    }
427  if (ecdrOffset == fsize)
428    {
429      append_msg (CMSG_ERROR,
430		  GTXT ("%s: cannot find the central directory record"), name);
431      return 0;
432    }
433  if (endCentDir->count == 0xffff || endCentDir->offset == 0xffffffff
434      || endCentDir->size == 0xffffffff)
435    {
436      // Zip64 format:
437      //      Zip64 end of central directory record
438      //      Zip64 end of central directory locator  ( Can be absent )
439      //      End of central directory record
440      b = (unsigned char *) dwin->bind (ecdrOffset - ZIP64_LOCATOR_SIZE,
441					ZIP64_LOCATOR_SIZE);
442      if (b == NULL)
443	{
444	  append_msg (CMSG_ERROR,
445	     GTXT ("%s: cannot find the Zip64 central directory record"), name);
446	  return 0;
447	}
448      uint32_t signature = get_u4 (b);
449      if (signature == 0x07064b50)
450	{ // Get an offset from the Zip64 cent-dir locator
451	  //  Zip64 end of central directory locator
452	  //  Offset Bytes    Description
453	  //     0     4    Zip64 end of central dir locator signature = 0x07064b50
454	  //     4     4    number of the disk with the start of the zip64 end of central directory
455	  //     8     8    relative offset of the Zip64 end of central directory record
456	  //    12     4    total number of disks
457	  Dprintf (DUMP_JAR_FILE, "    cent-dir locator offset           %10lld (0x%016llx)\n",
458		   (long long) (ecdrOffset - ZIP64_LOCATOR_SIZE), (long long) (ecdrOffset - ZIP64_LOCATOR_SIZE));
459	  ecdrOffset = get_u8 (b + 8);
460	}
461      else   // the Zip64 end of central directory locator is absent
462	ecdrOffset -= ZIP64_CENT_DIR_SIZE;
463      Dprintf (DUMP_JAR_FILE, NTXT ("  Zip64 end-cent-dir record offset:   %10lld (0x%016llx)\n"),
464	       (long long) ecdrOffset, (long long) ecdrOffset);
465
466      b = (unsigned char *) dwin->bind (ecdrOffset, ZIP64_CENT_DIR_SIZE);
467      if (b == NULL)
468	{
469	  append_msg (CMSG_ERROR,
470	     GTXT ("%s: cannot find the Zip64 central directory record"), name);
471	  return 0;
472	}
473      //  Zip64 end of central directory record
474      //  Offset Bytes    Description
475      //     0     4    Zip64 end of central dir signature = 0x06064b50
476      //     4     8    size of zip64 end of central directory record
477      //    12     2    version made by
478      //    14     2    version needed to extract
479      //    16     4    number of this disk
480      //    20     4    number of the disk with the start of the central directory
481      //    24     8    total number of entries in the central directory on this disk
482      //    32     8    total number of entries in the central directory
483      //    40     8    size of the central directory
484      //    48     8    offset of start of centraldirectory with respect to the starting disk number
485      //    56          Zip64 extensible data sector (variable size)
486      signature = get_u4 (b);
487      if (signature != 0x06064b50)
488	{
489	  append_msg (CMSG_ERROR, GTXT ("%s: cannot find the Zip64 central directory record"), name);
490	  return 0;
491	}
492      endCentDir->count = get_u8 (b + 32);
493      endCentDir->size = get_u8 (b + 40);
494      endCentDir->offset = get_u8 (b + 48);
495      Dprintf (DUMP_JAR_FILE,
496	       NTXT ("  cent-dir offset:                    %10lld (0x%016llx)\n"
497		     "  cent-dir size:                      %10lld (0x%016llx)\n"
498		     "  cent-dir entries:                   %10lld\n"),
499	       (long long) endCentDir->offset, (long long) endCentDir->offset,
500	       (long long) endCentDir->size, (long long) endCentDir->size,
501	       (long long) endCentDir->count);
502    }
503  return 1;
504}
505
506