symbolTable.cpp revision 3465:d2a62e0f25eb
1/*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "classfile/altHashing.hpp"
27#include "classfile/javaClasses.hpp"
28#include "classfile/symbolTable.hpp"
29#include "classfile/systemDictionary.hpp"
30#include "gc_interface/collectedHeap.inline.hpp"
31#include "memory/allocation.inline.hpp"
32#include "memory/filemap.hpp"
33#include "memory/gcLocker.inline.hpp"
34#include "oops/oop.inline.hpp"
35#include "oops/oop.inline2.hpp"
36#include "runtime/mutexLocker.hpp"
37#include "utilities/hashtable.inline.hpp"
38#include "utilities/numberSeq.hpp"
39
40// --------------------------------------------------------------------------
41
42SymbolTable* SymbolTable::_the_table = NULL;
43// Static arena for symbols that are not deallocated
44Arena* SymbolTable::_arena = NULL;
45bool SymbolTable::_needs_rehashing = false;
46jint SymbolTable::_seed = 0;
47
48Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
49  assert (len <= Symbol::max_length(), "should be checked by caller");
50
51  Symbol* sym;
52  // Allocate symbols in the C heap when dumping shared spaces in case there
53  // are temporary symbols we can remove.
54  if (c_heap || DumpSharedSpaces) {
55    // refcount starts as 1
56    sym = new (len, THREAD) Symbol(name, len, 1);
57  } else {
58    sym = new (len, arena(), THREAD) Symbol(name, len, -1);
59  }
60  assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
61  return sym;
62}
63
64void SymbolTable::initialize_symbols(int arena_alloc_size) {
65  // Initialize the arena for global symbols, size passed in depends on CDS.
66  if (arena_alloc_size == 0) {
67    _arena = new (mtSymbol) Arena();
68  } else {
69    _arena = new (mtSymbol) Arena(arena_alloc_size);
70  }
71}
72
73// Call function for all symbols in the symbol table.
74void SymbolTable::symbols_do(SymbolClosure *cl) {
75  const int n = the_table()->table_size();
76  for (int i = 0; i < n; i++) {
77    for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
78         p != NULL;
79         p = p->next()) {
80      cl->do_symbol(p->literal_addr());
81    }
82  }
83}
84
85int SymbolTable::symbols_removed = 0;
86int SymbolTable::symbols_counted = 0;
87
88// Remove unreferenced symbols from the symbol table
89// This is done late during GC.
90void SymbolTable::unlink() {
91  int removed = 0;
92  int total = 0;
93  size_t memory_total = 0;
94  for (int i = 0; i < the_table()->table_size(); ++i) {
95    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
96    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
97    while (entry != NULL) {
98      // Shared entries are normally at the end of the bucket and if we run into
99      // a shared entry, then there is nothing more to remove. However, if we
100      // have rehashed the table, then the shared entries are no longer at the
101      // end of the bucket.
102      if (entry->is_shared() && !use_alternate_hashcode()) {
103        break;
104      }
105      Symbol* s = entry->literal();
106      memory_total += s->object_size();
107      total++;
108      assert(s != NULL, "just checking");
109      // If reference count is zero, remove.
110      if (s->refcount() == 0) {
111        assert(!entry->is_shared(), "shared entries should be kept live");
112        delete s;
113        removed++;
114        *p = entry->next();
115        the_table()->free_entry(entry);
116      } else {
117        p = entry->next_addr();
118      }
119      // get next entry
120      entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
121    }
122  }
123  symbols_removed += removed;
124  symbols_counted += total;
125  // Exclude printing for normal PrintGCDetails because people parse
126  // this output.
127  if (PrintGCDetails && Verbose && WizardMode) {
128    gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", total,
129                        (memory_total*HeapWordSize)/1024);
130  }
131}
132
133unsigned int SymbolTable::new_hash(Symbol* sym) {
134  ResourceMark rm;
135  // Use alternate hashing algorithm on this symbol.
136  return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
137}
138
139// Create a new table and using alternate hash code, populate the new table
140// with the existing strings.   Set flag to use the alternate hash code afterwards.
141void SymbolTable::rehash_table() {
142  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
143  // This should never happen with -Xshare:dump but it might in testing mode.
144  if (DumpSharedSpaces) return;
145  // Create a new symbol table
146  SymbolTable* new_table = new SymbolTable();
147
148  // Initialize the global seed for hashing.
149  _seed = AltHashing::compute_seed();
150  assert(seed() != 0, "shouldn't be zero");
151
152  the_table()->move_to(new_table);
153
154  // Delete the table and buckets (entries are reused in new table).
155  delete _the_table;
156  // Don't check if we need rehashing until the table gets unbalanced again.
157  // Then rehash with a new global seed.
158  _needs_rehashing = false;
159  _the_table = new_table;
160}
161
162// Lookup a symbol in a bucket.
163
164Symbol* SymbolTable::lookup(int index, const char* name,
165                              int len, unsigned int hash) {
166  int count = 0;
167  for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
168    count++;  // count all entries in this bucket, not just ones with same hash
169    if (e->hash() == hash) {
170      Symbol* sym = e->literal();
171      if (sym->equals(name, len)) {
172        // something is referencing this symbol now.
173        sym->increment_refcount();
174        return sym;
175      }
176    }
177  }
178  // If the bucket size is too deep check if this hash code is insufficient.
179  if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
180    _needs_rehashing = check_rehash_table(count);
181  }
182  return NULL;
183}
184
185// Pick hashing algorithm.
186unsigned int SymbolTable::hash_symbol(const char* s, int len) {
187  return use_alternate_hashcode() ?
188           AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
189           java_lang_String::to_hash(s, len);
190}
191
192
193// We take care not to be blocking while holding the
194// SymbolTable_lock. Otherwise, the system might deadlock, since the
195// symboltable is used during compilation (VM_thread) The lock free
196// synchronization is simplified by the fact that we do not delete
197// entries in the symbol table during normal execution (only during
198// safepoints).
199
200Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
201  unsigned int hashValue = hash_symbol(name, len);
202  int index = the_table()->hash_to_index(hashValue);
203
204  Symbol* s = the_table()->lookup(index, name, len, hashValue);
205
206  // Found
207  if (s != NULL) return s;
208
209  // Grab SymbolTable_lock first.
210  MutexLocker ml(SymbolTable_lock, THREAD);
211
212  // Otherwise, add to symbol to table
213  return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
214}
215
216Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
217  char* buffer;
218  int index, len;
219  unsigned int hashValue;
220  char* name;
221  {
222    debug_only(No_Safepoint_Verifier nsv;)
223
224    name = (char*)sym->base() + begin;
225    len = end - begin;
226    hashValue = hash_symbol(name, len);
227    index = the_table()->hash_to_index(hashValue);
228    Symbol* s = the_table()->lookup(index, name, len, hashValue);
229
230    // Found
231    if (s != NULL) return s;
232  }
233
234  // Otherwise, add to symbol to table. Copy to a C string first.
235  char stack_buf[128];
236  ResourceMark rm(THREAD);
237  if (len <= 128) {
238    buffer = stack_buf;
239  } else {
240    buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
241  }
242  for (int i=0; i<len; i++) {
243    buffer[i] = name[i];
244  }
245  // Make sure there is no safepoint in the code above since name can't move.
246  // We can't include the code in No_Safepoint_Verifier because of the
247  // ResourceMark.
248
249  // Grab SymbolTable_lock first.
250  MutexLocker ml(SymbolTable_lock, THREAD);
251
252  return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
253}
254
255Symbol* SymbolTable::lookup_only(const char* name, int len,
256                                   unsigned int& hash) {
257  hash = hash_symbol(name, len);
258  int index = the_table()->hash_to_index(hash);
259
260  Symbol* s = the_table()->lookup(index, name, len, hash);
261  return s;
262}
263
264// Look up the address of the literal in the SymbolTable for this Symbol*
265// Do not create any new symbols
266// Do not increment the reference count to keep this alive
267Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
268  unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
269  int index = the_table()->hash_to_index(hash);
270
271  for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
272    if (e->hash() == hash) {
273      Symbol* literal_sym = e->literal();
274      if (sym == literal_sym) {
275        return e->literal_addr();
276      }
277    }
278  }
279  return NULL;
280}
281
282// Suggestion: Push unicode-based lookup all the way into the hashing
283// and probing logic, so there is no need for convert_to_utf8 until
284// an actual new Symbol* is created.
285Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
286  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
287  char stack_buf[128];
288  if (utf8_length < (int) sizeof(stack_buf)) {
289    char* chars = stack_buf;
290    UNICODE::convert_to_utf8(name, utf16_length, chars);
291    return lookup(chars, utf8_length, THREAD);
292  } else {
293    ResourceMark rm(THREAD);
294    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
295    UNICODE::convert_to_utf8(name, utf16_length, chars);
296    return lookup(chars, utf8_length, THREAD);
297  }
298}
299
300Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
301                                           unsigned int& hash) {
302  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
303  char stack_buf[128];
304  if (utf8_length < (int) sizeof(stack_buf)) {
305    char* chars = stack_buf;
306    UNICODE::convert_to_utf8(name, utf16_length, chars);
307    return lookup_only(chars, utf8_length, hash);
308  } else {
309    ResourceMark rm;
310    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
311    UNICODE::convert_to_utf8(name, utf16_length, chars);
312    return lookup_only(chars, utf8_length, hash);
313  }
314}
315
316void SymbolTable::add(Handle class_loader, constantPoolHandle cp,
317                      int names_count,
318                      const char** names, int* lengths, int* cp_indices,
319                      unsigned int* hashValues, TRAPS) {
320  // Grab SymbolTable_lock first.
321  MutexLocker ml(SymbolTable_lock, THREAD);
322
323  SymbolTable* table = the_table();
324  bool added = table->basic_add(class_loader, cp, names_count, names, lengths,
325                                cp_indices, hashValues, CHECK);
326  if (!added) {
327    // do it the hard way
328    for (int i=0; i<names_count; i++) {
329      int index = table->hash_to_index(hashValues[i]);
330      bool c_heap = class_loader() != NULL;
331      Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
332      cp->symbol_at_put(cp_indices[i], sym);
333    }
334  }
335}
336
337Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
338  unsigned int hash;
339  Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
340  if (result != NULL) {
341    return result;
342  }
343  // Grab SymbolTable_lock first.
344  MutexLocker ml(SymbolTable_lock, THREAD);
345
346  SymbolTable* table = the_table();
347  int index = table->hash_to_index(hash);
348  return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
349}
350
351Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
352                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
353  assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
354         "proposed name of symbol must be stable");
355
356  // Don't allow symbols to be created which cannot fit in a Symbol*.
357  if (len > Symbol::max_length()) {
358    THROW_MSG_0(vmSymbols::java_lang_InternalError(),
359                "name is too long to represent");
360  }
361
362  // Cannot hit a safepoint in this function because the "this" pointer can move.
363  No_Safepoint_Verifier nsv;
364
365  // Check if the symbol table has been rehashed, if so, need to recalculate
366  // the hash value and index.
367  unsigned int hashValue;
368  int index;
369  if (use_alternate_hashcode()) {
370    hashValue = hash_symbol((const char*)name, len);
371    index = hash_to_index(hashValue);
372  } else {
373    hashValue = hashValue_arg;
374    index = index_arg;
375  }
376
377  // Since look-up was done lock-free, we need to check if another
378  // thread beat us in the race to insert the symbol.
379  Symbol* test = lookup(index, (char*)name, len, hashValue);
380  if (test != NULL) {
381    // A race occurred and another thread introduced the symbol.
382    assert(test->refcount() != 0, "lookup should have incremented the count");
383    return test;
384  }
385
386  // Create a new symbol.
387  Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
388  assert(sym->equals((char*)name, len), "symbol must be properly initialized");
389
390  HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
391  add_entry(index, entry);
392  return sym;
393}
394
395// This version of basic_add adds symbols in batch from the constant pool
396// parsing.
397bool SymbolTable::basic_add(Handle class_loader, constantPoolHandle cp,
398                            int names_count,
399                            const char** names, int* lengths,
400                            int* cp_indices, unsigned int* hashValues,
401                            TRAPS) {
402
403  // Check symbol names are not too long.  If any are too long, don't add any.
404  for (int i = 0; i< names_count; i++) {
405    if (lengths[i] > Symbol::max_length()) {
406      THROW_MSG_0(vmSymbols::java_lang_InternalError(),
407                  "name is too long to represent");
408    }
409  }
410
411  // Cannot hit a safepoint in this function because the "this" pointer can move.
412  No_Safepoint_Verifier nsv;
413
414  for (int i=0; i<names_count; i++) {
415    // Check if the symbol table has been rehashed, if so, need to recalculate
416    // the hash value.
417    unsigned int hashValue;
418    if (use_alternate_hashcode()) {
419      hashValue = hash_symbol(names[i], lengths[i]);
420    } else {
421      hashValue = hashValues[i];
422    }
423    // Since look-up was done lock-free, we need to check if another
424    // thread beat us in the race to insert the symbol.
425    int index = hash_to_index(hashValue);
426    Symbol* test = lookup(index, names[i], lengths[i], hashValue);
427    if (test != NULL) {
428      // A race occurred and another thread introduced the symbol, this one
429      // will be dropped and collected. Use test instead.
430      cp->symbol_at_put(cp_indices[i], test);
431      assert(test->refcount() != 0, "lookup should have incremented the count");
432    } else {
433      // Create a new symbol.  The null class loader is never unloaded so these
434      // are allocated specially in a permanent arena.
435      bool c_heap = class_loader() != NULL;
436      Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
437      assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
438      HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
439      add_entry(index, entry);
440      cp->symbol_at_put(cp_indices[i], sym);
441    }
442  }
443  return true;
444}
445
446
447void SymbolTable::verify() {
448  for (int i = 0; i < the_table()->table_size(); ++i) {
449    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
450    for ( ; p != NULL; p = p->next()) {
451      Symbol* s = (Symbol*)(p->literal());
452      guarantee(s != NULL, "symbol is NULL");
453      unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
454      guarantee(p->hash() == h, "broken hash in symbol table entry");
455      guarantee(the_table()->hash_to_index(h) == i,
456                "wrong index in symbol table");
457    }
458  }
459}
460
461void SymbolTable::dump(outputStream* st) {
462  NumberSeq summary;
463  for (int i = 0; i < the_table()->table_size(); ++i) {
464    int count = 0;
465    for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(i);
466       e != NULL; e = e->next()) {
467      count++;
468    }
469    summary.add((double)count);
470  }
471  st->print_cr("SymbolTable statistics:");
472  st->print_cr("Number of buckets       : %7d", summary.num());
473  st->print_cr("Average bucket size     : %7.0f", summary.avg());
474  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
475  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
476  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
477}
478
479
480//---------------------------------------------------------------------------
481// Non-product code
482
483#ifndef PRODUCT
484
485void SymbolTable::print_histogram() {
486  MutexLocker ml(SymbolTable_lock);
487  const int results_length = 100;
488  int results[results_length];
489  int i,j;
490
491  // initialize results to zero
492  for (j = 0; j < results_length; j++) {
493    results[j] = 0;
494  }
495
496  int total = 0;
497  int max_symbols = 0;
498  int out_of_range = 0;
499  int memory_total = 0;
500  int count = 0;
501  for (i = 0; i < the_table()->table_size(); i++) {
502    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
503    for ( ; p != NULL; p = p->next()) {
504      memory_total += p->literal()->object_size();
505      count++;
506      int counter = p->literal()->utf8_length();
507      total += counter;
508      if (counter < results_length) {
509        results[counter]++;
510      } else {
511        out_of_range++;
512      }
513      max_symbols = MAX2(max_symbols, counter);
514    }
515  }
516  tty->print_cr("Symbol Table:");
517  tty->print_cr("Total number of symbols  %5d", count);
518  tty->print_cr("Total size in memory     %5dK",
519          (memory_total*HeapWordSize)/1024);
520  tty->print_cr("Total counted            %5d", symbols_counted);
521  tty->print_cr("Total removed            %5d", symbols_removed);
522  if (symbols_counted > 0) {
523    tty->print_cr("Percent removed          %3.2f",
524          ((float)symbols_removed/(float)symbols_counted)* 100);
525  }
526  tty->print_cr("Reference counts         %5d", Symbol::_total_count);
527  tty->print_cr("Symbol arena size        %5d used %5d",
528                 arena()->size_in_bytes(), arena()->used());
529  tty->print_cr("Histogram of symbol length:");
530  tty->print_cr("%8s %5d", "Total  ", total);
531  tty->print_cr("%8s %5d", "Maximum", max_symbols);
532  tty->print_cr("%8s %3.2f", "Average",
533          ((float) total / (float) the_table()->table_size()));
534  tty->print_cr("%s", "Histogram:");
535  tty->print_cr(" %s %29s", "Length", "Number chains that length");
536  for (i = 0; i < results_length; i++) {
537    if (results[i] > 0) {
538      tty->print_cr("%6d %10d", i, results[i]);
539    }
540  }
541  if (Verbose) {
542    int line_length = 70;
543    tty->print_cr("%s %30s", " Length", "Number chains that length");
544    for (i = 0; i < results_length; i++) {
545      if (results[i] > 0) {
546        tty->print("%4d", i);
547        for (j = 0; (j < results[i]) && (j < line_length);  j++) {
548          tty->print("%1s", "*");
549        }
550        if (j == line_length) {
551          tty->print("%1s", "+");
552        }
553        tty->cr();
554      }
555    }
556  }
557  tty->print_cr(" %s %d: %d\n", "Number chains longer than",
558                    results_length, out_of_range);
559}
560
561void SymbolTable::print() {
562  for (int i = 0; i < the_table()->table_size(); ++i) {
563    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
564    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
565    if (entry != NULL) {
566      while (entry != NULL) {
567        tty->print(PTR_FORMAT " ", entry->literal());
568        entry->literal()->print();
569        tty->print(" %d", entry->literal()->refcount());
570        p = entry->next_addr();
571        entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
572      }
573      tty->cr();
574    }
575  }
576}
577#endif // PRODUCT
578
579// --------------------------------------------------------------------------
580
581#ifdef ASSERT
582class StableMemoryChecker : public StackObj {
583  enum { _bufsize = wordSize*4 };
584
585  address _region;
586  jint    _size;
587  u1      _save_buf[_bufsize];
588
589  int sample(u1* save_buf) {
590    if (_size <= _bufsize) {
591      memcpy(save_buf, _region, _size);
592      return _size;
593    } else {
594      // copy head and tail
595      memcpy(&save_buf[0],          _region,                      _bufsize/2);
596      memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
597      return (_bufsize/2)*2;
598    }
599  }
600
601 public:
602  StableMemoryChecker(const void* region, jint size) {
603    _region = (address) region;
604    _size   = size;
605    sample(_save_buf);
606  }
607
608  bool verify() {
609    u1 check_buf[sizeof(_save_buf)];
610    int check_size = sample(check_buf);
611    return (0 == memcmp(_save_buf, check_buf, check_size));
612  }
613
614  void set_region(const void* region) { _region = (address) region; }
615};
616#endif
617
618
619// --------------------------------------------------------------------------
620StringTable* StringTable::_the_table = NULL;
621
622bool StringTable::_needs_rehashing = false;
623jint StringTable::_seed = 0;
624
625// Pick hashing algorithm
626unsigned int StringTable::hash_string(const jchar* s, int len) {
627  return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
628                                    java_lang_String::to_hash(s, len);
629}
630
631oop StringTable::lookup(int index, jchar* name,
632                        int len, unsigned int hash) {
633  int count = 0;
634  for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
635    count++;
636    if (l->hash() == hash) {
637      if (java_lang_String::equals(l->literal(), name, len)) {
638        return l->literal();
639      }
640    }
641  }
642  // If the bucket size is too deep check if this hash code is insufficient.
643  if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
644    _needs_rehashing = check_rehash_table(count);
645  }
646  return NULL;
647}
648
649
650oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
651                           int len, unsigned int hashValue_arg, TRAPS) {
652
653  assert(java_lang_String::equals(string(), name, len),
654         "string must be properly initialized");
655  // Cannot hit a safepoint in this function because the "this" pointer can move.
656  No_Safepoint_Verifier nsv;
657
658  // Check if the symbol table has been rehashed, if so, need to recalculate
659  // the hash value and index before second lookup.
660  unsigned int hashValue;
661  int index;
662  if (use_alternate_hashcode()) {
663    hashValue = hash_string(name, len);
664    index = hash_to_index(hashValue);
665  } else {
666    hashValue = hashValue_arg;
667    index = index_arg;
668  }
669
670  // Since look-up was done lock-free, we need to check if another
671  // thread beat us in the race to insert the symbol.
672
673  oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
674  if (test != NULL) {
675    // Entry already added
676    return test;
677  }
678
679  HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
680  add_entry(index, entry);
681  return string();
682}
683
684
685oop StringTable::lookup(Symbol* symbol) {
686  ResourceMark rm;
687  int length;
688  jchar* chars = symbol->as_unicode(length);
689  unsigned int hashValue = hash_string(chars, length);
690  int index = the_table()->hash_to_index(hashValue);
691  return the_table()->lookup(index, chars, length, hashValue);
692}
693
694
695oop StringTable::intern(Handle string_or_null, jchar* name,
696                        int len, TRAPS) {
697  unsigned int hashValue = hash_string(name, len);
698  int index = the_table()->hash_to_index(hashValue);
699  oop found_string = the_table()->lookup(index, name, len, hashValue);
700
701  // Found
702  if (found_string != NULL) return found_string;
703
704  debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
705  assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
706         "proposed name of symbol must be stable");
707
708  Handle string;
709  // try to reuse the string if possible
710  if (!string_or_null.is_null() && (!JavaObjectsInPerm || string_or_null()->is_perm())) {
711    string = string_or_null;
712  } else {
713    string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
714  }
715
716  // Grab the StringTable_lock before getting the_table() because it could
717  // change at safepoint.
718  MutexLocker ml(StringTable_lock, THREAD);
719
720  // Otherwise, add to symbol to table
721  return the_table()->basic_add(index, string, name, len,
722                                hashValue, CHECK_NULL);
723}
724
725oop StringTable::intern(Symbol* symbol, TRAPS) {
726  if (symbol == NULL) return NULL;
727  ResourceMark rm(THREAD);
728  int length;
729  jchar* chars = symbol->as_unicode(length);
730  Handle string;
731  oop result = intern(string, chars, length, CHECK_NULL);
732  return result;
733}
734
735
736oop StringTable::intern(oop string, TRAPS)
737{
738  if (string == NULL) return NULL;
739  ResourceMark rm(THREAD);
740  int length;
741  Handle h_string (THREAD, string);
742  jchar* chars = java_lang_String::as_unicode_string(string, length);
743  oop result = intern(h_string, chars, length, CHECK_NULL);
744  return result;
745}
746
747
748oop StringTable::intern(const char* utf8_string, TRAPS) {
749  if (utf8_string == NULL) return NULL;
750  ResourceMark rm(THREAD);
751  int length = UTF8::unicode_length(utf8_string);
752  jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
753  UTF8::convert_to_unicode(utf8_string, chars, length);
754  Handle string;
755  oop result = intern(string, chars, length, CHECK_NULL);
756  return result;
757}
758
759void StringTable::unlink(BoolObjectClosure* is_alive) {
760  // Readers of the table are unlocked, so we should only be removing
761  // entries at a safepoint.
762  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
763  for (int i = 0; i < the_table()->table_size(); ++i) {
764    HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
765    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
766    while (entry != NULL) {
767      // Shared entries are normally at the end of the bucket and if we run into
768      // a shared entry, then there is nothing more to remove. However, if we
769      // have rehashed the table, then the shared entries are no longer at the
770      // end of the bucket.
771      if (entry->is_shared() && !use_alternate_hashcode()) {
772        break;
773      }
774      assert(entry->literal() != NULL, "just checking");
775      if (entry->is_shared() || is_alive->do_object_b(entry->literal())) {
776        p = entry->next_addr();
777      } else {
778        *p = entry->next();
779        the_table()->free_entry(entry);
780      }
781      entry = (HashtableEntry<oop, mtSymbol>*)HashtableEntry<oop, mtSymbol>::make_ptr(*p);
782    }
783  }
784}
785
786void StringTable::oops_do(OopClosure* f) {
787  for (int i = 0; i < the_table()->table_size(); ++i) {
788    HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
789    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
790    while (entry != NULL) {
791      f->do_oop((oop*)entry->literal_addr());
792
793      // Did the closure remove the literal from the table?
794      if (entry->literal() == NULL) {
795        assert(!entry->is_shared(), "immutable hashtable entry?");
796        *p = entry->next();
797        the_table()->free_entry(entry);
798      } else {
799        p = entry->next_addr();
800      }
801      entry = (HashtableEntry<oop, mtSymbol>*)HashtableEntry<oop, mtSymbol>::make_ptr(*p);
802    }
803  }
804}
805
806void StringTable::verify() {
807  for (int i = 0; i < the_table()->table_size(); ++i) {
808    HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
809    for ( ; p != NULL; p = p->next()) {
810      oop s = p->literal();
811      guarantee(s != NULL, "interned string is NULL");
812      guarantee(s->is_perm() || !JavaObjectsInPerm, "interned string not in permspace");
813      unsigned int h = java_lang_String::hash_string(s);
814      guarantee(p->hash() == h, "broken hash in string table entry");
815      guarantee(the_table()->hash_to_index(h) == i,
816                "wrong index in string table");
817    }
818  }
819}
820
821void StringTable::dump(outputStream* st) {
822  NumberSeq summary;
823  for (int i = 0; i < the_table()->table_size(); ++i) {
824    HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
825    int count = 0;
826    for ( ; p != NULL; p = p->next()) {
827      count++;
828    }
829    summary.add((double)count);
830  }
831  st->print_cr("StringTable statistics:");
832  st->print_cr("Number of buckets       : %7d", summary.num());
833  st->print_cr("Average bucket size     : %7.0f", summary.avg());
834  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
835  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
836  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
837}
838
839
840unsigned int StringTable::new_hash(oop string) {
841  ResourceMark rm;
842  int length;
843  jchar* chars = java_lang_String::as_unicode_string(string, length);
844  // Use alternate hashing algorithm on the string
845  return AltHashing::murmur3_32(seed(), chars, length);
846}
847
848// Create a new table and using alternate hash code, populate the new table
849// with the existing strings.   Set flag to use the alternate hash code afterwards.
850void StringTable::rehash_table() {
851  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
852  // This should never happen with -Xshare:dump but it might in testing mode.
853  if (DumpSharedSpaces) return;
854  StringTable* new_table = new StringTable();
855
856  // Initialize new global seed for hashing.
857  _seed = AltHashing::compute_seed();
858  assert(seed() != 0, "shouldn't be zero");
859
860  // Rehash the table
861  the_table()->move_to(new_table);
862
863  // Delete the table and buckets (entries are reused in new table).
864  delete _the_table;
865  // Don't check if we need rehashing until the table gets unbalanced again.
866  // Then rehash with a new global seed.
867  _needs_rehashing = false;
868  _the_table = new_table;
869}
870