imageFile.cpp revision 13901:b2a69d66dc65
1/*
2 * Copyright (c) 2015, 2016 Oracle and/or its affiliates. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 *   - Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 *
11 *   - Redistributions in binary form must reproduce the above copyright
12 *     notice, this list of conditions and the following disclaimer in the
13 *     documentation and/or other materials provided with the distribution.
14 *
15 *   - Neither the name of Oracle nor the names of its
16 *     contributors may be used to endorse or promote products derived
17 *     from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <assert.h>
33#include <string.h>
34#include <stdlib.h>
35
36#include "endian.hpp"
37#include "imageDecompressor.hpp"
38#include "imageFile.hpp"
39#include "inttypes.hpp"
40#include "jni.h"
41#include "osSupport.hpp"
42
43// Map the full jimage, only with 64 bit addressing.
44bool ImageFileReader::memory_map_image = sizeof(void *) == 8;
45
46#ifdef WIN32
47const char FileSeparator = '\\';
48#else
49const char FileSeparator = '/';
50#endif
51
52// Image files are an alternate file format for storing classes and resources. The
53// goal is to supply file access which is faster and smaller than the jar format.
54//
55// (More detailed nodes in the header.)
56//
57
58// Compute the Perfect Hashing hash code for the supplied UTF-8 string.
59s4 ImageStrings::hash_code(const char* string, s4 seed) {
60    // Access bytes as unsigned.
61    u1* bytes = (u1*)string;
62    // Compute hash code.
63    for (u1 byte = *bytes++; byte; byte = *bytes++) {
64        seed = (seed * HASH_MULTIPLIER) ^ byte;
65    }
66    // Ensure the result is not signed.
67    return seed & 0x7FFFFFFF;
68}
69
70// Match up a string in a perfect hash table.
71// Returns the index where the name should be.
72// Result still needs validation for precise match (false positive.)
73s4 ImageStrings::find(Endian* endian, const char* name, s4* redirect, u4 length) {
74    // If the table is empty, then short cut.
75    if (!redirect || !length) {
76        return NOT_FOUND;
77    }
78    // Compute the basic perfect hash for name.
79    s4 hash_code = ImageStrings::hash_code(name);
80    // Modulo table size.
81    s4 index = hash_code % length;
82    // Get redirect entry.
83    //   value == 0 then not found
84    //   value < 0 then -1 - value is true index
85    //   value > 0 then value is seed for recomputing hash.
86    s4 value = endian->get(redirect[index]);
87    // if recompute is required.
88    if (value > 0 ) {
89        // Entry collision value, need to recompute hash.
90        hash_code = ImageStrings::hash_code(name, value);
91        // Modulo table size.
92        return hash_code % length;
93    } else if (value < 0) {
94        // Compute direct index.
95        return -1 - value;
96    }
97    // No entry found.
98    return NOT_FOUND;
99}
100
101// Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
102// return non-NULL address of remaining portion of string.  Otherwise, return
103// NULL.    Used to test sections of a path without copying from image string
104// table.
105const char* ImageStrings::starts_with(const char* string, const char* start) {
106    char ch1, ch2;
107    // Match up the strings the best we can.
108    while ((ch1 = *string) && (ch2 = *start)) {
109        if (ch1 != ch2) {
110            // Mismatch, return NULL.
111            return NULL;
112        }
113        // Next characters.
114        string++, start++;
115    }
116    // Return remainder of string.
117    return string;
118}
119
120// Inflates the attribute stream into individual values stored in the long
121// array _attributes. This allows an attribute value to be quickly accessed by
122// direct indexing.  Unspecified values default to zero (from constructor.)
123void ImageLocation::set_data(u1* data) {
124    // Deflate the attribute stream into an array of attributes.
125    u1 byte;
126    // Repeat until end header is found.
127    while ((data != NULL) && (byte = *data)) {
128        // Extract kind from header byte.
129        u1 kind = attribute_kind(byte);
130        assert(kind < ATTRIBUTE_COUNT && "invalid image location attribute");
131        // Extract length of data (in bytes).
132        u1 n = attribute_length(byte);
133        // Read value (most significant first.)
134        _attributes[kind] = attribute_value(data + 1, n);
135        // Position to next attribute by skipping attribute header and data bytes.
136        data += n + 1;
137    }
138}
139
140// Zero all attribute values.
141void ImageLocation::clear_data() {
142    // Set defaults to zero.
143    memset(_attributes, 0, sizeof(_attributes));
144}
145
146// ImageModuleData constructor maps out sub-tables for faster access.
147ImageModuleData::ImageModuleData(const ImageFileReader* image_file) :
148        _image_file(image_file),
149        _endian(image_file->endian()) {
150}
151
152// Release module data resource.
153ImageModuleData::~ImageModuleData() {
154}
155
156
157// Return the module in which a package resides.    Returns NULL if not found.
158const char* ImageModuleData::package_to_module(const char* package_name) {
159    // replace all '/' by '.'
160    char* replaced = new char[(int) strlen(package_name) + 1];
161    assert(replaced != NULL && "allocation failed");
162    int i;
163    for (i = 0; package_name[i] != '\0'; i++) {
164      replaced[i] = package_name[i] == '/' ? '.' : package_name[i];
165    }
166    replaced[i] = '\0';
167
168    // build path /packages/<package_name>
169    const char* radical = "/packages/";
170    char* path = new char[(int) strlen(radical) + (int) strlen(package_name) + 1];
171    assert(path != NULL && "allocation failed");
172    strcpy(path, radical);
173    strcat(path, replaced);
174    delete[] replaced;
175
176    // retrieve package location
177    ImageLocation location;
178    bool found = _image_file->find_location(path, location);
179    if (!found) {
180        delete[] path;
181        return NULL;
182    }
183
184    // retrieve offsets to module name
185    int size = (int)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
186    u1* content = new u1[size];
187    assert(content != NULL && "allocation failed");
188    _image_file->get_resource(location, content);
189    u1* ptr = content;
190    // sequence of sizeof(8) isEmpty|offset. Use the first module that is not empty.
191    u4 offset = 0;
192    for (i = 0; i < size; i+=8) {
193        u4 isEmpty = _endian->get(*((u4*)ptr));
194        ptr += 4;
195        if (!isEmpty) {
196            offset = _endian->get(*((u4*)ptr));
197            break;
198        }
199        ptr += 4;
200    }
201    delete[] content;
202    return _image_file->get_strings().get(offset);
203}
204
205// Manage a table of open image files.  This table allows multiple access points
206// to share an open image.
207ImageFileReaderTable::ImageFileReaderTable() : _count(0), _max(_growth) {
208    _table = new ImageFileReader*[_max];
209    assert(_table != NULL && "allocation failed");
210}
211
212ImageFileReaderTable::~ImageFileReaderTable() {
213    delete[] _table;
214}
215
216// Add a new image entry to the table.
217void ImageFileReaderTable::add(ImageFileReader* image) {
218    if (_count == _max) {
219        _max += _growth;
220        _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
221    }
222    _table[_count++] = image;
223}
224
225// Remove an image entry from the table.
226void ImageFileReaderTable::remove(ImageFileReader* image) {
227    for (u4 i = 0; i < _count; i++) {
228        if (_table[i] == image) {
229            // Swap the last element into the found slot
230            _table[i] = _table[--_count];
231            break;
232        }
233    }
234
235    if (_count != 0 && _count == _max - _growth) {
236        _max -= _growth;
237        _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
238    }
239}
240
241// Determine if image entry is in table.
242bool ImageFileReaderTable::contains(ImageFileReader* image) {
243    for (u4 i = 0; i < _count; i++) {
244        if (_table[i] == image) {
245            return true;
246        }
247    }
248    return false;
249}
250
251// Table to manage multiple opens of an image file.
252ImageFileReaderTable ImageFileReader::_reader_table;
253
254SimpleCriticalSection _reader_table_lock;
255
256// Locate an image if file already open.
257ImageFileReader* ImageFileReader::find_image(const char* name) {
258    // Lock out _reader_table.
259    SimpleCriticalSectionLock cs(&_reader_table_lock);
260    // Search for an exist image file.
261    for (u4 i = 0; i < _reader_table.count(); i++) {
262        // Retrieve table entry.
263        ImageFileReader* reader = _reader_table.get(i);
264        // If name matches, then reuse (bump up use count.)
265        assert(reader->name() != NULL && "reader->name must not be null");
266        if (strcmp(reader->name(), name) == 0) {
267            reader->inc_use();
268            return reader;
269        }
270    }
271
272    return NULL;
273}
274
275// Open an image file, reuse structure if file already open.
276ImageFileReader* ImageFileReader::open(const char* name, bool big_endian) {
277    ImageFileReader* reader = find_image(name);
278    if (reader != NULL) {
279        return reader;
280    }
281
282    // Need a new image reader.
283    reader = new ImageFileReader(name, big_endian);
284    if (reader == NULL || !reader->open()) {
285        // Failed to open.
286        delete reader;
287        return NULL;
288    }
289
290    // Lock to update
291    SimpleCriticalSectionLock cs(&_reader_table_lock);
292    // Search for an existing image file.
293    for (u4 i = 0; i < _reader_table.count(); i++) {
294        // Retrieve table entry.
295        ImageFileReader* existing_reader = _reader_table.get(i);
296        // If name matches, then reuse (bump up use count.)
297        assert(reader->name() != NULL && "reader->name still must not be null");
298        if (strcmp(existing_reader->name(), name) == 0) {
299            existing_reader->inc_use();
300            reader->close();
301            delete reader;
302            return existing_reader;
303        }
304    }
305    // Bump use count and add to table.
306    reader->inc_use();
307    _reader_table.add(reader);
308    return reader;
309}
310
311// Close an image file if the file is not in use elsewhere.
312void ImageFileReader::close(ImageFileReader *reader) {
313    // Lock out _reader_table.
314    SimpleCriticalSectionLock cs(&_reader_table_lock);
315    // If last use then remove from table and then close.
316    if (reader->dec_use()) {
317        _reader_table.remove(reader);
318        delete reader;
319    }
320}
321
322// Return an id for the specifed ImageFileReader.
323u8 ImageFileReader::reader_to_ID(ImageFileReader *reader) {
324    // ID is just the cloaked reader address.
325    return (u8)reader;
326}
327
328// Validate the image id.
329bool ImageFileReader::id_check(u8 id) {
330    // Make sure the ID is a managed (_reader_table) reader.
331    SimpleCriticalSectionLock cs(&_reader_table_lock);
332    return _reader_table.contains((ImageFileReader*)id);
333}
334
335// Return an id for the specifed ImageFileReader.
336ImageFileReader* ImageFileReader::id_to_reader(u8 id) {
337    assert(id_check(id) && "invalid image id");
338    return (ImageFileReader*)id;
339}
340
341// Constructor intializes to a closed state.
342ImageFileReader::ImageFileReader(const char* name, bool big_endian) {
343    // Copy the image file name.
344     int len = (int) strlen(name) + 1;
345    _name = new char[len];
346    assert(_name != NULL  && "allocation failed");
347    strncpy(_name, name, len);
348    // Initialize for a closed file.
349    _fd = -1;
350    _endian = Endian::get_handler(big_endian);
351    _index_data = NULL;
352}
353
354// Close image and free up data structures.
355ImageFileReader::~ImageFileReader() {
356    // Ensure file is closed.
357    close();
358    // Free up name.
359    if (_name) {
360        delete[] _name;
361        _name = NULL;
362    }
363}
364
365// Open image file for read access.
366bool ImageFileReader::open() {
367    // If file exists open for reading.
368    _fd = osSupport::openReadOnly(_name);
369    if (_fd == -1) {
370        return false;
371    }
372    // Retrieve the file size.
373    _file_size = osSupport::size(_name);
374    // Read image file header and verify it has a valid header.
375    size_t header_size = sizeof(ImageHeader);
376    if (_file_size < header_size ||
377        !read_at((u1*)&_header, header_size, 0) ||
378        _header.magic(_endian) != IMAGE_MAGIC ||
379        _header.major_version(_endian) != MAJOR_VERSION ||
380        _header.minor_version(_endian) != MINOR_VERSION) {
381        close();
382        return false;
383    }
384    // Size of image index.
385    _index_size = index_size();
386    // Make sure file is large enough to contain the index.
387    if (_file_size < _index_size) {
388        return false;
389    }
390    // Memory map image (minimally the index.)
391    _index_data = (u1*)osSupport::map_memory(_fd, _name, 0, (size_t)map_size());
392    assert(_index_data && "image file not memory mapped");
393    // Retrieve length of index perfect hash table.
394    u4 length = table_length();
395    // Compute offset of the perfect hash table redirect table.
396    u4 redirect_table_offset = (u4)header_size;
397    // Compute offset of index attribute offsets.
398    u4 offsets_table_offset = redirect_table_offset + length * (u4)sizeof(s4);
399    // Compute offset of index location attribute data.
400    u4 location_bytes_offset = offsets_table_offset + length * (u4)sizeof(u4);
401    // Compute offset of index string table.
402    u4 string_bytes_offset = location_bytes_offset + locations_size();
403    // Compute address of the perfect hash table redirect table.
404    _redirect_table = (s4*)(_index_data + redirect_table_offset);
405    // Compute address of index attribute offsets.
406    _offsets_table = (u4*)(_index_data + offsets_table_offset);
407    // Compute address of index location attribute data.
408    _location_bytes = _index_data + location_bytes_offset;
409    // Compute address of index string table.
410    _string_bytes = _index_data + string_bytes_offset;
411
412    // Initialize the module data
413    module_data = new ImageModuleData(this);
414    // Successful open (if memory allocation succeeded).
415    return module_data != NULL;
416}
417
418// Close image file.
419void ImageFileReader::close() {
420    // Deallocate the index.
421    if (_index_data) {
422        osSupport::unmap_memory((char*)_index_data, _index_size);
423        _index_data = NULL;
424    }
425    // Close file.
426    if (_fd != -1) {
427        osSupport::close(_fd);
428        _fd = -1;
429    }
430}
431
432// Read directly from the file.
433bool ImageFileReader::read_at(u1* data, u8 size, u8 offset) const {
434    return (u8)osSupport::read(_fd, (char*)data, size, offset) == size;
435}
436
437// Find the location attributes associated with the path.    Returns true if
438// the location is found, false otherwise.
439bool ImageFileReader::find_location(const char* path, ImageLocation& location) const {
440    // Locate the entry in the index perfect hash table.
441    s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
442    // If is found.
443    if (index != ImageStrings::NOT_FOUND) {
444        // Get address of first byte of location attribute stream.
445        u1* data = get_location_data(index);
446        // Expand location attributes.
447        location.set_data(data);
448        // Make sure result is not a false positive.
449        return verify_location(location, path);
450    }
451    return false;
452}
453
454// Find the location index and size associated with the path.
455// Returns the location index and size if the location is found, 0 otherwise.
456u4 ImageFileReader::find_location_index(const char* path, u8 *size) const {
457    // Locate the entry in the index perfect hash table.
458    s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
459    // If found.
460    if (index != ImageStrings::NOT_FOUND) {
461        // Get address of first byte of location attribute stream.
462        u4 offset = get_location_offset(index);
463        u1* data = get_location_offset_data(offset);
464        // Expand location attributes.
465        ImageLocation location(data);
466        // Make sure result is not a false positive.
467        if (verify_location(location, path)) {
468                *size = (jlong)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
469                return offset;
470        }
471    }
472    return 0;            // not found
473}
474
475// Assemble the location path from the string fragments indicated in the location attributes.
476void ImageFileReader::location_path(ImageLocation& location, char* path, size_t max) const {
477    // Manage the image string table.
478    ImageStrings strings(_string_bytes, _header.strings_size(_endian));
479    // Position to first character of the path buffer.
480    char* next = path;
481    // Temp for string length.
482    size_t length;
483    // Get module string.
484    const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
485    // If module string is not empty string.
486    if (*module != '\0') {
487        // Get length of module name.
488        length = strlen(module);
489        // Make sure there is no buffer overflow.
490        assert(next - path + length + 2 < max && "buffer overflow");
491        // Append '/module/'.
492        *next++ = '/';
493        strncpy(next, module, length); next += length;
494        *next++ = '/';
495    }
496    // Get parent (package) string.
497    const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
498    // If parent string is not empty string.
499    if (*parent != '\0') {
500        // Get length of module string.
501        length = strlen(parent);
502        // Make sure there is no buffer overflow.
503        assert(next - path + length + 1 < max && "buffer overflow");
504        // Append 'patent/' .
505        strncpy(next, parent, length); next += length;
506        *next++ = '/';
507    }
508    // Get base name string.
509    const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
510    // Get length of base name.
511    length = strlen(base);
512    // Make sure there is no buffer overflow.
513    assert(next - path + length < max && "buffer overflow");
514    // Append base name.
515    strncpy(next, base, length); next += length;
516    // Get extension string.
517    const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
518    // If extension string is not empty string.
519    if (*extension != '\0') {
520        // Get length of extension string.
521        length = strlen(extension);
522        // Make sure there is no buffer overflow.
523        assert(next - path + length + 1 < max && "buffer overflow");
524        // Append '.extension' .
525        *next++ = '.';
526        strncpy(next, extension, length); next += length;
527    }
528    // Make sure there is no buffer overflow.
529    assert((size_t)(next - path) < max && "buffer overflow");
530    // Terminate string.
531    *next = '\0';
532}
533
534// Verify that a found location matches the supplied path (without copying.)
535bool ImageFileReader::verify_location(ImageLocation& location, const char* path) const {
536    // Manage the image string table.
537    ImageStrings strings(_string_bytes, _header.strings_size(_endian));
538    // Position to first character of the path string.
539    const char* next = path;
540    // Get module name string.
541    const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
542    // If module string is not empty.
543    if (*module != '\0') {
544        // Compare '/module/' .
545        if (*next++ != '/') return false;
546        if (!(next = ImageStrings::starts_with(next, module))) return false;
547        if (*next++ != '/') return false;
548    }
549    // Get parent (package) string
550    const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
551    // If parent string is not empty string.
552    if (*parent != '\0') {
553        // Compare 'parent/' .
554        if (!(next = ImageStrings::starts_with(next, parent))) return false;
555        if (*next++ != '/') return false;
556    }
557    // Get base name string.
558    const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
559    // Compare with basne name.
560    if (!(next = ImageStrings::starts_with(next, base))) return false;
561    // Get extension string.
562    const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
563    // If extension is not empty.
564    if (*extension != '\0') {
565        // Compare '.extension' .
566        if (*next++ != '.') return false;
567        if (!(next = ImageStrings::starts_with(next, extension))) return false;
568    }
569    // True only if complete match and no more characters.
570    return *next == '\0';
571}
572
573// Return the resource for the supplied location offset.
574void ImageFileReader::get_resource(u4 offset, u1* uncompressed_data) const {
575        // Get address of first byte of location attribute stream.
576        u1* data = get_location_offset_data(offset);
577        // Expand location attributes.
578        ImageLocation location(data);
579        // Read the data
580        get_resource(location, uncompressed_data);
581}
582
583// Return the resource for the supplied location.
584void ImageFileReader::get_resource(ImageLocation& location, u1* uncompressed_data) const {
585    // Retrieve the byte offset and size of the resource.
586    u8 offset = location.get_attribute(ImageLocation::ATTRIBUTE_OFFSET);
587    u8 uncompressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
588    u8 compressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_COMPRESSED);
589    // If the resource is compressed.
590    if (compressed_size != 0) {
591        u1* compressed_data;
592        // If not memory mapped read in bytes.
593        if (!memory_map_image) {
594            // Allocate buffer for compression.
595            compressed_data = new u1[(size_t)compressed_size];
596            assert(compressed_data != NULL && "allocation failed");
597            // Read bytes from offset beyond the image index.
598            bool is_read = read_at(compressed_data, compressed_size, _index_size + offset);
599            assert(is_read && "error reading from image or short read");
600        } else {
601            compressed_data = get_data_address() + offset;
602        }
603        // Get image string table.
604        const ImageStrings strings = get_strings();
605        // Decompress resource.
606        ImageDecompressor::decompress_resource(compressed_data, uncompressed_data, uncompressed_size,
607                        &strings, _endian);
608        // If not memory mapped then release temporary buffer.
609        if (!memory_map_image) {
610                delete[] compressed_data;
611        }
612    } else {
613        // Read bytes from offset beyond the image index.
614        bool is_read = read_at(uncompressed_data, uncompressed_size, _index_size + offset);
615        assert(is_read && "error reading from image or short read");
616    }
617}
618
619// Return the ImageModuleData for this image
620ImageModuleData * ImageFileReader::get_image_module_data() {
621        return module_data;
622}
623