1/* 2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25#ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 26#define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 27 28#include "classfile/stringTable.hpp" 29#include "classfile/symbolTable.hpp" 30#include "oops/symbol.hpp" 31#include "services/diagnosticCommand.hpp" 32#include "utilities/hashtable.hpp" 33 34template <class T, class N> class CompactHashtable; 35class NumberSeq; 36class SimpleCompactHashtable; 37class SerializeClosure; 38 39// Stats for symbol tables in the CDS archive 40class CompactHashtableStats VALUE_OBJ_CLASS_SPEC { 41public: 42 int hashentry_count; 43 int hashentry_bytes; 44 int bucket_count; 45 int bucket_bytes; 46}; 47 48///////////////////////////////////////////////////////////////////////// 49// 50// The compact hash table writer. Used at dump time for writing out 51// the compact table to the shared archive. 52// 53// At dump time, the CompactHashtableWriter obtains all entries from the 54// symbol/string table and adds them to a new temporary hash table. The hash 55// table size (number of buckets) is calculated using 56// '(num_entries + bucket_size - 1) / bucket_size'. The default bucket 57// size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option. 58// 4 is chosen because it produces smaller sized bucket on average for 59// faster lookup. It also has relatively small number of empty buckets and 60// good distribution of the entries. 61// 62// We use a simple hash function (hash % num_bucket) for the table. 63// The new table is compacted when written out. Please see comments 64// above the CompactHashtable class for the table layout detail. The bucket 65// offsets are written to the archive as part of the compact table. The 66// bucket offset is encoded in the low 30-bit (0-29) and the bucket type 67// (regular or compact) are encoded in bit[31, 30]. For buckets with more 68// than one entry, both hash and entry offset are written to the 69// table. For buckets with only one entry, only the entry offset is written 70// to the table and the buckets are tagged as compact in their type bits. 71// Buckets without entry are skipped from the table. Their offsets are 72// still written out for faster lookup. 73// 74class CompactHashtableWriter: public StackObj { 75public: 76 class Entry VALUE_OBJ_CLASS_SPEC { 77 unsigned int _hash; 78 u4 _value; 79 80 public: 81 Entry() {} 82 Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {} 83 84 u4 value() { 85 return _value; 86 } 87 unsigned int hash() { 88 return _hash; 89 } 90 91 bool operator==(const CompactHashtableWriter::Entry& other) { 92 return (_value == other._value && _hash == other._hash); 93 } 94 }; // class CompactHashtableWriter::Entry 95 96private: 97 int _num_entries; 98 int _num_buckets; 99 int _num_empty_buckets; 100 int _num_value_only_buckets; 101 int _num_other_buckets; 102 GrowableArray<Entry>** _buckets; 103 CompactHashtableStats* _stats; 104 Array<u4>* _compact_buckets; 105 Array<u4>* _compact_entries; 106 107public: 108 // This is called at dump-time only 109 CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats); 110 ~CompactHashtableWriter(); 111 112 void add(unsigned int hash, u4 value); 113 void add(u4 value) { 114 add((unsigned int)value, value); 115 } 116 117private: 118 void allocate_table(); 119 void dump_table(NumberSeq* summary); 120 121public: 122 void dump(SimpleCompactHashtable *cht, const char* table_name); 123 const char* table_name(); 124}; 125 126class CompactSymbolTableWriter: public CompactHashtableWriter { 127public: 128 CompactSymbolTableWriter(int num_buckets, CompactHashtableStats* stats) : 129 CompactHashtableWriter(num_buckets, stats) {} 130 void add(unsigned int hash, Symbol *symbol); 131 void dump(CompactHashtable<Symbol*, char> *cht); 132}; 133 134class CompactStringTableWriter: public CompactHashtableWriter { 135public: 136 CompactStringTableWriter(int num_entries, CompactHashtableStats* stats) : 137 CompactHashtableWriter(num_entries, stats) {} 138 void add(unsigned int hash, oop string); 139 void dump(CompactHashtable<oop, char> *cht); 140}; 141 142#define REGULAR_BUCKET_TYPE 0 143#define VALUE_ONLY_BUCKET_TYPE 1 144#define TABLEEND_BUCKET_TYPE 3 145#define BUCKET_OFFSET_MASK 0x3FFFFFFF 146#define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK) 147#define BUCKET_TYPE_SHIFT 30 148#define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT) 149#define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK)) 150 151///////////////////////////////////////////////////////////////////////////// 152// 153// CompactHashtable is used to stored the CDS archive's symbol/string table. Used 154// at runtime only to access the compact table from the archive. 155// 156// Because these tables are read-only (no entries can be added/deleted) at run-time 157// and tend to have large number of entries, we try to minimize the footprint 158// cost per entry. 159// 160// The CompactHashtable is split into two arrays 161// 162// u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset 163// u4 entries[<variable size>] 164// 165// The size of buckets[] is 'num_buckets + 1'. Each entry of 166// buckets[] is a 32-bit encoding of the bucket type and bucket offset, 167// with the type in the left-most 2-bit and offset in the remaining 30-bit. 168// The last entry is a special type. It contains the end of the last 169// bucket. 170// 171// There are two types of buckets, regular buckets and value_only buckets. The 172// value_only buckets have '01' in their highest 2-bit, and regular buckets have 173// '00' in their highest 2-bit. 174// 175// For normal buckets, each entry is 8 bytes in the entries[]: 176// u4 hash; /* symbol/string hash */ 177// union { 178// u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */ 179// narrowOop str; /* String narrowOop encoding */ 180// } 181// 182// 183// For value_only buckets, each entry has only the 4-byte 'offset' in the entries[]. 184// 185// Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code 186// is skipped. 187// buckets[0, 4, 5, ....] 188// | | | 189// | | +---+ 190// | | | 191// | +----+ | 192// v v v 193// entries[H,O,H,O,O,H,O,H,O.....] 194// 195// See CompactHashtable::lookup() for how the table is searched at runtime. 196// See CompactHashtableWriter::dump() for how the table is written at CDS 197// dump time. 198// 199class SimpleCompactHashtable VALUE_OBJ_CLASS_SPEC { 200protected: 201 address _base_address; 202 u4 _bucket_count; 203 u4 _entry_count; 204 u4* _buckets; 205 u4* _entries; 206 207public: 208 SimpleCompactHashtable() { 209 _entry_count = 0; 210 _bucket_count = 0; 211 _buckets = 0; 212 _entries = 0; 213 } 214 215 void reset() { 216 _bucket_count = 0; 217 _entry_count = 0; 218 _buckets = 0; 219 _entries = 0; 220 } 221 222 void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) { 223 _base_address = base_address; 224 _bucket_count = bucket_count; 225 _entry_count = entry_count; 226 _buckets = buckets; 227 _entries = entries; 228 } 229 230 template <class I> inline void iterate(const I& iterator); 231 232 bool exists(u4 value); 233 234 // For reading from/writing to the CDS archive 235 void serialize(SerializeClosure* soc); 236}; 237 238template <class T, class N> class CompactHashtable : public SimpleCompactHashtable { 239 friend class VMStructs; 240 241public: 242 enum CompactHashtableType { 243 _symbol_table = 0, 244 _string_table = 1 245 }; 246 247private: 248 u4 _type; 249 250 inline Symbol* decode_entry(CompactHashtable<Symbol*, char>* const t, 251 u4 offset, const char* name, int len); 252 253 inline oop decode_entry(CompactHashtable<oop, char>* const t, 254 u4 offset, const char* name, int len); 255public: 256 CompactHashtable() : SimpleCompactHashtable() {} 257 258 void set_type(CompactHashtableType type) { 259 _type = (u4)type; 260 } 261 262 // Lookup an entry from the compact table 263 inline T lookup(const N* name, unsigned int hash, int len); 264 265 // iterate over symbols 266 void symbols_do(SymbolClosure *cl); 267 268 // iterate over strings 269 void oops_do(OopClosure* f); 270 271 // For reading from/writing to the CDS archive 272 void serialize(SerializeClosure* soc); 273 274 uintx base_address() { 275 return (uintx) _base_address; 276 } 277}; 278 279//////////////////////////////////////////////////////////////////////// 280// 281// Read/Write the contents of a hashtable textual dump (created by 282// SymbolTable::dump and StringTable::dump). 283// Because the dump file may be big (hundred of MB in extreme cases), 284// we use mmap for fast access when reading it. 285// 286class HashtableTextDump VALUE_OBJ_CLASS_SPEC { 287 int _fd; 288 const char* _base; 289 const char* _p; 290 const char* _end; 291 const char* _filename; 292 size_t _size; 293 int _prefix_type; 294 int _line_no; 295public: 296 HashtableTextDump(const char* filename); 297 ~HashtableTextDump(); 298 299 enum { 300 SymbolPrefix = 1 << 0, 301 StringPrefix = 1 << 1, 302 Unknown = 1 << 2 303 }; 304 305 void quit(const char* err, const char* msg); 306 307 inline int remain() { 308 return (int)(_end - _p); 309 } 310 311 void corrupted(const char *p, const char *msg); 312 313 inline void corrupted_if(bool cond, const char *msg) { 314 if (cond) { 315 corrupted(_p, msg); 316 } 317 } 318 319 bool skip_newline(); 320 int skip(char must_be_char); 321 void skip_past(char c); 322 void check_version(const char* ver); 323 324 inline void get_num(char delim, int *num) { 325 const char* p = _p; 326 const char* end = _end; 327 u8 n = 0; 328 329 while (p < end) { 330 char c = *p++; 331 if ('0' <= c && c <= '9') { 332 n = n * 10 + (c - '0'); 333 if (n > (u8)INT_MAX) { 334 corrupted(_p, "Num overflow"); 335 } 336 } else if (c == delim) { 337 _p = p; 338 *num = (int)n; 339 return; 340 } else { 341 // Not [0-9], not 'delim' 342 corrupted(_p, "Unrecognized format");; 343 } 344 } 345 346 corrupted(_end, "Incorrect format"); 347 ShouldNotReachHere(); 348 } 349 350 void scan_prefix_type(); 351 int scan_prefix(int* utf8_length); 352 int scan_string_prefix(); 353 int scan_symbol_prefix(); 354 355 jchar unescape(const char* from, const char* end, int count); 356 void get_utf8(char* utf8_buffer, int utf8_length); 357 static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length); 358}; 359 360/////////////////////////////////////////////////////////////////////// 361// 362// jcmd command support for symbol table and string table dumping: 363// VM.symboltable -verbose: for dumping the symbol table 364// VM.stringtable -verbose: for dumping the string table 365// 366class VM_DumpHashtable : public VM_Operation { 367private: 368 outputStream* _out; 369 int _which; 370 bool _verbose; 371public: 372 enum { 373 DumpSymbols = 1 << 0, 374 DumpStrings = 1 << 1, 375 DumpSysDict = 1 << 2 // not implemented yet 376 }; 377 VM_DumpHashtable(outputStream* out, int which, bool verbose) { 378 _out = out; 379 _which = which; 380 _verbose = verbose; 381 } 382 383 virtual VMOp_Type type() const { return VMOp_DumpHashtable; } 384 385 virtual void doit() { 386 switch (_which) { 387 case DumpSymbols: 388 SymbolTable::dump(_out, _verbose); 389 break; 390 case DumpStrings: 391 StringTable::dump(_out, _verbose); 392 break; 393 default: 394 ShouldNotReachHere(); 395 } 396 } 397}; 398 399class SymboltableDCmd : public DCmdWithParser { 400protected: 401 DCmdArgument<bool> _verbose; 402public: 403 SymboltableDCmd(outputStream* output, bool heap); 404 static const char* name() { 405 return "VM.symboltable"; 406 } 407 static const char* description() { 408 return "Dump symbol table."; 409 } 410 static const char* impact() { 411 return "Medium: Depends on Java content."; 412 } 413 static const JavaPermission permission() { 414 JavaPermission p = {"java.lang.management.ManagementPermission", 415 "monitor", NULL}; 416 return p; 417 } 418 static int num_arguments(); 419 virtual void execute(DCmdSource source, TRAPS); 420}; 421 422class StringtableDCmd : public DCmdWithParser { 423protected: 424 DCmdArgument<bool> _verbose; 425public: 426 StringtableDCmd(outputStream* output, bool heap); 427 static const char* name() { 428 return "VM.stringtable"; 429 } 430 static const char* description() { 431 return "Dump string table."; 432 } 433 static const char* impact() { 434 return "Medium: Depends on Java content."; 435 } 436 static const JavaPermission permission() { 437 JavaPermission p = {"java.lang.management.ManagementPermission", 438 "monitor", NULL}; 439 return p; 440 } 441 static int num_arguments(); 442 virtual void execute(DCmdSource source, TRAPS); 443}; 444 445#endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 446