1/*
2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
26#define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
27
28#include "classfile/stringTable.hpp"
29#include "classfile/symbolTable.hpp"
30#include "oops/symbol.hpp"
31#include "services/diagnosticCommand.hpp"
32#include "utilities/hashtable.hpp"
33
34template <class T, class N> class CompactHashtable;
35class NumberSeq;
36class SimpleCompactHashtable;
37class SerializeClosure;
38
39// Stats for symbol tables in the CDS archive
40class CompactHashtableStats VALUE_OBJ_CLASS_SPEC {
41public:
42  int hashentry_count;
43  int hashentry_bytes;
44  int bucket_count;
45  int bucket_bytes;
46};
47
48/////////////////////////////////////////////////////////////////////////
49//
50// The compact hash table writer. Used at dump time for writing out
51// the compact table to the shared archive.
52//
53// At dump time, the CompactHashtableWriter obtains all entries from the
54// symbol/string table and adds them to a new temporary hash table. The hash
55// table size (number of buckets) is calculated using
56// '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
57// size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
58// 4 is chosen because it produces smaller sized bucket on average for
59// faster lookup. It also has relatively small number of empty buckets and
60// good distribution of the entries.
61//
62// We use a simple hash function (hash % num_bucket) for the table.
63// The new table is compacted when written out. Please see comments
64// above the CompactHashtable class for the table layout detail. The bucket
65// offsets are written to the archive as part of the compact table. The
66// bucket offset is encoded in the low 30-bit (0-29) and the bucket type
67// (regular or compact) are encoded in bit[31, 30]. For buckets with more
68// than one entry, both hash and entry offset are written to the
69// table. For buckets with only one entry, only the entry offset is written
70// to the table and the buckets are tagged as compact in their type bits.
71// Buckets without entry are skipped from the table. Their offsets are
72// still written out for faster lookup.
73//
74class CompactHashtableWriter: public StackObj {
75public:
76  class Entry VALUE_OBJ_CLASS_SPEC {
77    unsigned int _hash;
78    u4 _value;
79
80  public:
81    Entry() {}
82    Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
83
84    u4 value() {
85      return _value;
86    }
87    unsigned int hash() {
88      return _hash;
89    }
90
91    bool operator==(const CompactHashtableWriter::Entry& other) {
92      return (_value == other._value && _hash == other._hash);
93    }
94  }; // class CompactHashtableWriter::Entry
95
96private:
97  int _num_entries;
98  int _num_buckets;
99  int _num_empty_buckets;
100  int _num_value_only_buckets;
101  int _num_other_buckets;
102  GrowableArray<Entry>** _buckets;
103  CompactHashtableStats* _stats;
104  Array<u4>* _compact_buckets;
105  Array<u4>* _compact_entries;
106
107public:
108  // This is called at dump-time only
109  CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats);
110  ~CompactHashtableWriter();
111
112  void add(unsigned int hash, u4 value);
113  void add(u4 value) {
114    add((unsigned int)value, value);
115  }
116
117private:
118  void allocate_table();
119  void dump_table(NumberSeq* summary);
120
121public:
122  void dump(SimpleCompactHashtable *cht, const char* table_name);
123  const char* table_name();
124};
125
126class CompactSymbolTableWriter: public CompactHashtableWriter {
127public:
128  CompactSymbolTableWriter(int num_buckets, CompactHashtableStats* stats) :
129    CompactHashtableWriter(num_buckets, stats) {}
130  void add(unsigned int hash, Symbol *symbol);
131  void dump(CompactHashtable<Symbol*, char> *cht);
132};
133
134class CompactStringTableWriter: public CompactHashtableWriter {
135public:
136  CompactStringTableWriter(int num_entries, CompactHashtableStats* stats) :
137    CompactHashtableWriter(num_entries, stats) {}
138  void add(unsigned int hash, oop string);
139  void dump(CompactHashtable<oop, char> *cht);
140};
141
142#define REGULAR_BUCKET_TYPE       0
143#define VALUE_ONLY_BUCKET_TYPE    1
144#define TABLEEND_BUCKET_TYPE      3
145#define BUCKET_OFFSET_MASK        0x3FFFFFFF
146#define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
147#define BUCKET_TYPE_SHIFT         30
148#define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
149#define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
150
151/////////////////////////////////////////////////////////////////////////////
152//
153// CompactHashtable is used to stored the CDS archive's symbol/string table. Used
154// at runtime only to access the compact table from the archive.
155//
156// Because these tables are read-only (no entries can be added/deleted) at run-time
157// and tend to have large number of entries, we try to minimize the footprint
158// cost per entry.
159//
160// The CompactHashtable is split into two arrays
161//
162//   u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
163//   u4 entries[<variable size>]
164//
165// The size of buckets[] is 'num_buckets + 1'. Each entry of
166// buckets[] is a 32-bit encoding of the bucket type and bucket offset,
167// with the type in the left-most 2-bit and offset in the remaining 30-bit.
168// The last entry is a special type. It contains the end of the last
169// bucket.
170//
171// There are two types of buckets, regular buckets and value_only buckets. The
172// value_only buckets have '01' in their highest 2-bit, and regular buckets have
173// '00' in their highest 2-bit.
174//
175// For normal buckets, each entry is 8 bytes in the entries[]:
176//   u4 hash;    /* symbol/string hash */
177//   union {
178//     u4 offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
179//     narrowOop str; /* String narrowOop encoding */
180//   }
181//
182//
183// For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
184//
185// Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
186//            is skipped.
187// buckets[0, 4, 5, ....]
188//         |  |  |
189//         |  |  +---+
190//         |  |      |
191//         |  +----+ |
192//         v       v v
193// entries[H,O,H,O,O,H,O,H,O.....]
194//
195// See CompactHashtable::lookup() for how the table is searched at runtime.
196// See CompactHashtableWriter::dump() for how the table is written at CDS
197// dump time.
198//
199class SimpleCompactHashtable VALUE_OBJ_CLASS_SPEC {
200protected:
201  address  _base_address;
202  u4  _bucket_count;
203  u4  _entry_count;
204  u4* _buckets;
205  u4* _entries;
206
207public:
208  SimpleCompactHashtable() {
209    _entry_count = 0;
210    _bucket_count = 0;
211    _buckets = 0;
212    _entries = 0;
213  }
214
215  void reset() {
216    _bucket_count = 0;
217    _entry_count = 0;
218    _buckets = 0;
219    _entries = 0;
220  }
221
222  void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
223    _base_address = base_address;
224    _bucket_count = bucket_count;
225    _entry_count = entry_count;
226    _buckets = buckets;
227    _entries = entries;
228  }
229
230  template <class I> inline void iterate(const I& iterator);
231
232  bool exists(u4 value);
233
234  // For reading from/writing to the CDS archive
235  void serialize(SerializeClosure* soc);
236};
237
238template <class T, class N> class CompactHashtable : public SimpleCompactHashtable {
239  friend class VMStructs;
240
241public:
242  enum CompactHashtableType {
243    _symbol_table = 0,
244    _string_table = 1
245  };
246
247private:
248  u4 _type;
249
250  inline Symbol* decode_entry(CompactHashtable<Symbol*, char>* const t,
251                              u4 offset, const char* name, int len);
252
253  inline oop decode_entry(CompactHashtable<oop, char>* const t,
254                          u4 offset, const char* name, int len);
255public:
256  CompactHashtable() : SimpleCompactHashtable() {}
257
258  void set_type(CompactHashtableType type) {
259    _type = (u4)type;
260  }
261
262  // Lookup an entry from the compact table
263  inline T lookup(const N* name, unsigned int hash, int len);
264
265  // iterate over symbols
266  void symbols_do(SymbolClosure *cl);
267
268  // iterate over strings
269  void oops_do(OopClosure* f);
270
271  // For reading from/writing to the CDS archive
272  void serialize(SerializeClosure* soc);
273
274  uintx base_address() {
275    return (uintx) _base_address;
276  }
277};
278
279////////////////////////////////////////////////////////////////////////
280//
281// Read/Write the contents of a hashtable textual dump (created by
282// SymbolTable::dump and StringTable::dump).
283// Because the dump file may be big (hundred of MB in extreme cases),
284// we use mmap for fast access when reading it.
285//
286class HashtableTextDump VALUE_OBJ_CLASS_SPEC {
287  int _fd;
288  const char* _base;
289  const char* _p;
290  const char* _end;
291  const char* _filename;
292  size_t      _size;
293  int         _prefix_type;
294  int         _line_no;
295public:
296  HashtableTextDump(const char* filename);
297  ~HashtableTextDump();
298
299  enum {
300    SymbolPrefix = 1 << 0,
301    StringPrefix = 1 << 1,
302    Unknown = 1 << 2
303  };
304
305  void quit(const char* err, const char* msg);
306
307  inline int remain() {
308    return (int)(_end - _p);
309  }
310
311  void corrupted(const char *p, const char *msg);
312
313  inline void corrupted_if(bool cond, const char *msg) {
314    if (cond) {
315      corrupted(_p, msg);
316    }
317  }
318
319  bool skip_newline();
320  int skip(char must_be_char);
321  void skip_past(char c);
322  void check_version(const char* ver);
323
324  inline void get_num(char delim, int *num) {
325    const char* p   = _p;
326    const char* end = _end;
327    u8 n = 0;
328
329    while (p < end) {
330      char c = *p++;
331      if ('0' <= c && c <= '9') {
332        n = n * 10 + (c - '0');
333        if (n > (u8)INT_MAX) {
334          corrupted(_p, "Num overflow");
335        }
336      } else if (c == delim) {
337        _p = p;
338        *num = (int)n;
339        return;
340      } else {
341        // Not [0-9], not 'delim'
342        corrupted(_p, "Unrecognized format");;
343      }
344    }
345
346    corrupted(_end, "Incorrect format");
347    ShouldNotReachHere();
348  }
349
350  void scan_prefix_type();
351  int scan_prefix(int* utf8_length);
352  int scan_string_prefix();
353  int scan_symbol_prefix();
354
355  jchar unescape(const char* from, const char* end, int count);
356  void get_utf8(char* utf8_buffer, int utf8_length);
357  static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
358};
359
360///////////////////////////////////////////////////////////////////////
361//
362// jcmd command support for symbol table and string table dumping:
363//   VM.symboltable -verbose: for dumping the symbol table
364//   VM.stringtable -verbose: for dumping the string table
365//
366class VM_DumpHashtable : public VM_Operation {
367private:
368  outputStream* _out;
369  int _which;
370  bool _verbose;
371public:
372  enum {
373    DumpSymbols = 1 << 0,
374    DumpStrings = 1 << 1,
375    DumpSysDict = 1 << 2  // not implemented yet
376  };
377  VM_DumpHashtable(outputStream* out, int which, bool verbose) {
378    _out = out;
379    _which = which;
380    _verbose = verbose;
381  }
382
383  virtual VMOp_Type type() const { return VMOp_DumpHashtable; }
384
385  virtual void doit() {
386    switch (_which) {
387    case DumpSymbols:
388      SymbolTable::dump(_out, _verbose);
389      break;
390    case DumpStrings:
391      StringTable::dump(_out, _verbose);
392      break;
393    default:
394      ShouldNotReachHere();
395    }
396  }
397};
398
399class SymboltableDCmd : public DCmdWithParser {
400protected:
401  DCmdArgument<bool> _verbose;
402public:
403  SymboltableDCmd(outputStream* output, bool heap);
404  static const char* name() {
405    return "VM.symboltable";
406  }
407  static const char* description() {
408    return "Dump symbol table.";
409  }
410  static const char* impact() {
411    return "Medium: Depends on Java content.";
412  }
413  static const JavaPermission permission() {
414    JavaPermission p = {"java.lang.management.ManagementPermission",
415                        "monitor", NULL};
416    return p;
417  }
418  static int num_arguments();
419  virtual void execute(DCmdSource source, TRAPS);
420};
421
422class StringtableDCmd : public DCmdWithParser {
423protected:
424  DCmdArgument<bool> _verbose;
425public:
426  StringtableDCmd(outputStream* output, bool heap);
427  static const char* name() {
428    return "VM.stringtable";
429  }
430  static const char* description() {
431    return "Dump string table.";
432  }
433  static const char* impact() {
434    return "Medium: Depends on Java content.";
435  }
436  static const JavaPermission permission() {
437    JavaPermission p = {"java.lang.management.ManagementPermission",
438                        "monitor", NULL};
439    return p;
440  }
441  static int num_arguments();
442  virtual void execute(DCmdSource source, TRAPS);
443};
444
445#endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
446