1/*
2 * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 *   - Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 *
11 *   - Redistributions in binary form must reproduce the above copyright
12 *     notice, this list of conditions and the following disclaimer in the
13 *     documentation and/or other materials provided with the distribution.
14 *
15 *   - Neither the name of Oracle nor the names of its
16 *     contributors may be used to endorse or promote products derived
17 *     from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef LIBJIMAGE_IMAGEDECOMPRESSOR_HPP
33#define LIBJIMAGE_IMAGEDECOMPRESSOR_HPP
34
35#include <assert.h>
36#include <string.h>
37
38#include "imageFile.hpp"
39#include "inttypes.hpp"
40#include "jni.h"
41
42/*
43 * Compressed resources located in image have an header.
44 * This header contains:
45 * - _magic: A magic u4, required to retrieved the header in the compressed content
46 * - _size: The size of the compressed resource.
47 * - _uncompressed_size: The uncompressed size of the compressed resource.
48 * - _decompressor_name_offset: The ImageDecompressor instance name StringsTable offset.
49 * - _decompressor_config_offset: StringsTable offset of configuration that could be needed by
50 *   the decompressor in order to decompress.
51 * - _is_terminal: 1: the compressed content is terminal. Uncompressing it would
52 *   create the actual resource. 0: the compressed content is not terminal. Uncompressing it
53 *   will result in a compressed content to be decompressed (This occurs when a stack of compressors
54 *   have been used to compress the resource.
55 */
56struct ResourceHeader {
57    /* magic bytes that identifies a compressed resource header*/
58    static const u4 resource_header_magic = 0xCAFEFAFA;
59    u4 _magic; // Resource header
60    u8 _size;    // Resource size
61    u8 _uncompressed_size;  // Expected uncompressed size
62    u4 _decompressor_name_offset;    // Strings table decompressor offset
63    u4 _decompressor_config_offset; // Strings table config offset
64    u1 _is_terminal; // Last decompressor 1, otherwise 0.
65};
66
67/*
68 * Resources located in jimage file can be compressed. Compression occurs at
69 * jimage file creation time. When compressed a resource is added an header that
70 * contains the name of the compressor that compressed it.
71 * Various compression strategies can be applied to compress a resource.
72 * The same resource can even be compressed multiple time by a stack of compressors.
73 * At runtime, a resource is decompressed in a loop until there is no more header
74 * meaning that the resource is equivalent to the not compressed resource.
75 * In each iteration, the name of the compressor located in the current header
76 * is used to retrieve the associated instance of ImageDecompressor.
77 * For example "zip" is the name of the compressor that compresses resources
78 * using the zip algorithm. The ZipDecompressor class name is also "zip".
79 * ImageDecompressor instances are retrieved from a static array in which
80 * they are registered.
81 */
82class ImageDecompressor {
83
84private:
85    const char* _name;
86
87    /*
88     * Array of concrete decompressors. This array is used to retrieve the decompressor
89     * that can handle resource decompression.
90     */
91    static ImageDecompressor** _decompressors;
92    /**
93     * Num of decompressors
94     */
95    static int _decompressors_num;
96    /*
97     * Identifier of a decompressor. This name is the identification key to retrieve
98     * decompressor from a resource header.
99     */
100    inline const char* get_name() const { return _name; }
101
102    static u8 getU8(u1* ptr, Endian *endian);
103    static u4 getU4(u1* ptr, Endian *endian);
104
105protected:
106    ImageDecompressor(const char* name) : _name(name) {
107    }
108    virtual void decompress_resource(u1* data, u1* uncompressed,
109        ResourceHeader* header, const ImageStrings* strings) = 0;
110
111public:
112    static void image_decompressor_init();
113    static void image_decompressor_close();
114    static ImageDecompressor* get_decompressor(const char * decompressor_name) ;
115    static void decompress_resource(u1* compressed, u1* uncompressed,
116        u8 uncompressed_size, const ImageStrings* strings, Endian* _endian);
117};
118
119/**
120 * Zip decompressor.
121 */
122class ZipDecompressor : public ImageDecompressor {
123public:
124    ZipDecompressor(const char* sym) : ImageDecompressor(sym) { }
125    void decompress_resource(u1* data, u1* uncompressed, ResourceHeader* header,
126        const ImageStrings* strings);
127    static jboolean decompress(void *in, u8 inSize, void *out, u8 outSize, char **pmsg);
128};
129
130/*
131 * Shared Strings decompressor. This decompressor reconstruct the class
132 * constant pool UTF_U entries by retrieving strings stored in jimage strings table.
133 * In addition, if the UTF_8 entry is a descriptor, the descriptor has to be rebuilt,
134 * all java type having been removed from the descriptor and added to the sting table.
135 * eg: "(Ljava/lang/String;I)V" ==> "(L;I)V" and "java/lang", "String"
136 * stored in string table. offsets to the 2 strings are compressed and stored in the
137 * constantpool entry.
138 */
139class SharedStringDecompressor : public ImageDecompressor {
140private:
141    // the constant pool tag for UTF8 string located in strings table
142    static const int externalized_string = 23;
143    // the constant pool tag for UTF8 descriptors string located in strings table
144    static const int externalized_string_descriptor = 25;
145    // the constant pool tag for UTF8
146    static const int constant_utf8 = 1;
147    // the constant pool tag for long
148    static const int constant_long = 5;
149    // the constant pool tag for double
150    static const int constant_double = 6;
151    // array index is the constant pool tag. value is size.
152    // eg: array[5]  = 8; means size of long is 8 bytes.
153    static const u1 sizes[];
154    // bit 5 and 6 are used to store the length of the compressed integer.
155    // size can be 1 (01), 2 (10), 3 (11).
156    // 0x60 ==> 0110000
157    static const int compressed_index_size_mask = 0x60;
158    /*
159     * mask the length bits (5 and 6) and move to the right 5 bits.
160     */
161    inline static int get_compressed_length(char c) {
162        return ((char) (c & compressed_index_size_mask) >> 5);
163    }
164    inline static bool is_compressed(signed char b1) { return b1 < 0; }
165    static int decompress_int(unsigned char*& value);
166public:
167    SharedStringDecompressor(const char* sym) : ImageDecompressor(sym){}
168    void decompress_resource(u1* data, u1* uncompressed, ResourceHeader* header,
169    const ImageStrings* strings);
170};
171#endif // LIBJIMAGE_IMAGEDECOMPRESSOR_HPP
172