symbolTable.cpp revision 2062:3582bf76420e
1/* 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25#include "precompiled.hpp" 26#include "classfile/javaClasses.hpp" 27#include "classfile/symbolTable.hpp" 28#include "classfile/systemDictionary.hpp" 29#include "gc_interface/collectedHeap.inline.hpp" 30#include "memory/filemap.hpp" 31#include "memory/gcLocker.inline.hpp" 32#include "oops/oop.inline.hpp" 33#include "oops/oop.inline2.hpp" 34#include "runtime/mutexLocker.hpp" 35#include "utilities/hashtable.inline.hpp" 36 37// -------------------------------------------------------------------------- 38 39SymbolTable* SymbolTable::_the_table = NULL; 40 41Symbol* SymbolTable::allocate_symbol(const u1* name, int len, TRAPS) { 42 // Don't allow symbols to be created which cannot fit in a Symbol*. 43 if (len > Symbol::max_length()) { 44 THROW_MSG_0(vmSymbols::java_lang_InternalError(), 45 "name is too long to represent"); 46 } 47 Symbol* sym = new (len) Symbol(name, len); 48 assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted"); 49 return sym; 50} 51 52bool SymbolTable::allocate_symbols(int names_count, const u1** names, 53 int* lengths, Symbol** syms, TRAPS) { 54 for (int i = 0; i< names_count; i++) { 55 if (lengths[i] > Symbol::max_length()) { 56 THROW_MSG_0(vmSymbols::java_lang_InternalError(), 57 "name is too long to represent"); 58 } 59 } 60 61 for (int i = 0; i< names_count; i++) { 62 int len = lengths[i]; 63 syms[i] = new (len) Symbol(names[i], len); 64 assert(syms[i] != NULL, "new should call vm_exit_out_of_memory if " 65 "C_HEAP is exhausted"); 66 } 67 return true; 68} 69 70// Call function for all symbols in the symbol table. 71void SymbolTable::symbols_do(SymbolClosure *cl) { 72 const int n = the_table()->table_size(); 73 for (int i = 0; i < n; i++) { 74 for (HashtableEntry<Symbol*>* p = the_table()->bucket(i); 75 p != NULL; 76 p = p->next()) { 77 cl->do_symbol(p->literal_addr()); 78 } 79 } 80} 81 82int SymbolTable::symbols_removed = 0; 83int SymbolTable::symbols_counted = 0; 84 85// Remove unreferenced symbols from the symbol table 86// This is done late during GC. This doesn't use the hash table unlink because 87// it assumes that the literals are oops. 88void SymbolTable::unlink() { 89 int removed = 0; 90 int total = 0; 91 int memory_total = 0; 92 for (int i = 0; i < the_table()->table_size(); ++i) { 93 for (HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i); *p != NULL; ) { 94 HashtableEntry<Symbol*>* entry = *p; 95 if (entry->is_shared()) { 96 break; 97 } 98 Symbol* s = entry->literal(); 99 memory_total += s->object_size(); 100 total++; 101 assert(s != NULL, "just checking"); 102 // If reference count is zero, remove. 103 if (s->refcount() == 0) { 104 delete s; 105 removed++; 106 *p = entry->next(); 107 the_table()->free_entry(entry); 108 } else { 109 p = entry->next_addr(); 110 } 111 } 112 } 113 symbols_removed += removed; 114 symbols_counted += total; 115 if (PrintGCDetails) { 116 gclog_or_tty->print(" [Symbols=%d size=%dK] ", total, 117 (memory_total*HeapWordSize)/1024); 118 } 119} 120 121 122// Lookup a symbol in a bucket. 123 124Symbol* SymbolTable::lookup(int index, const char* name, 125 int len, unsigned int hash) { 126 for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) { 127 if (e->hash() == hash) { 128 Symbol* sym = e->literal(); 129 if (sym->equals(name, len)) { 130 // something is referencing this symbol now. 131 sym->increment_refcount(); 132 return sym; 133 } 134 } 135 } 136 return NULL; 137} 138 139 140// We take care not to be blocking while holding the 141// SymbolTable_lock. Otherwise, the system might deadlock, since the 142// symboltable is used during compilation (VM_thread) The lock free 143// synchronization is simplified by the fact that we do not delete 144// entries in the symbol table during normal execution (only during 145// safepoints). 146 147Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) { 148 unsigned int hashValue = hash_symbol(name, len); 149 int index = the_table()->hash_to_index(hashValue); 150 151 Symbol* s = the_table()->lookup(index, name, len, hashValue); 152 153 // Found 154 if (s != NULL) return s; 155 156 // Otherwise, add to symbol to table 157 return the_table()->basic_add(index, (u1*)name, len, hashValue, CHECK_NULL); 158} 159 160Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) { 161 char* buffer; 162 int index, len; 163 unsigned int hashValue; 164 char* name; 165 { 166 debug_only(No_Safepoint_Verifier nsv;) 167 168 name = (char*)sym->base() + begin; 169 len = end - begin; 170 hashValue = hash_symbol(name, len); 171 index = the_table()->hash_to_index(hashValue); 172 Symbol* s = the_table()->lookup(index, name, len, hashValue); 173 174 // Found 175 if (s != NULL) return s; 176 } 177 178 // Otherwise, add to symbol to table. Copy to a C string first. 179 char stack_buf[128]; 180 ResourceMark rm(THREAD); 181 if (len <= 128) { 182 buffer = stack_buf; 183 } else { 184 buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len); 185 } 186 for (int i=0; i<len; i++) { 187 buffer[i] = name[i]; 188 } 189 // Make sure there is no safepoint in the code above since name can't move. 190 // We can't include the code in No_Safepoint_Verifier because of the 191 // ResourceMark. 192 193 return the_table()->basic_add(index, (u1*)buffer, len, hashValue, CHECK_NULL); 194} 195 196Symbol* SymbolTable::lookup_only(const char* name, int len, 197 unsigned int& hash) { 198 hash = hash_symbol(name, len); 199 int index = the_table()->hash_to_index(hash); 200 201 Symbol* s = the_table()->lookup(index, name, len, hash); 202 return s; 203} 204 205// Suggestion: Push unicode-based lookup all the way into the hashing 206// and probing logic, so there is no need for convert_to_utf8 until 207// an actual new Symbol* is created. 208Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) { 209 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length); 210 char stack_buf[128]; 211 if (utf8_length < (int) sizeof(stack_buf)) { 212 char* chars = stack_buf; 213 UNICODE::convert_to_utf8(name, utf16_length, chars); 214 return lookup(chars, utf8_length, THREAD); 215 } else { 216 ResourceMark rm(THREAD); 217 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);; 218 UNICODE::convert_to_utf8(name, utf16_length, chars); 219 return lookup(chars, utf8_length, THREAD); 220 } 221} 222 223Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length, 224 unsigned int& hash) { 225 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length); 226 char stack_buf[128]; 227 if (utf8_length < (int) sizeof(stack_buf)) { 228 char* chars = stack_buf; 229 UNICODE::convert_to_utf8(name, utf16_length, chars); 230 return lookup_only(chars, utf8_length, hash); 231 } else { 232 ResourceMark rm; 233 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);; 234 UNICODE::convert_to_utf8(name, utf16_length, chars); 235 return lookup_only(chars, utf8_length, hash); 236 } 237} 238 239void SymbolTable::add(constantPoolHandle cp, int names_count, 240 const char** names, int* lengths, int* cp_indices, 241 unsigned int* hashValues, TRAPS) { 242 SymbolTable* table = the_table(); 243 bool added = table->basic_add(cp, names_count, names, lengths, 244 cp_indices, hashValues, CHECK); 245 if (!added) { 246 // do it the hard way 247 for (int i=0; i<names_count; i++) { 248 int index = table->hash_to_index(hashValues[i]); 249 Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], 250 hashValues[i], CHECK); 251 cp->symbol_at_put(cp_indices[i], sym); 252 } 253 } 254} 255 256Symbol* SymbolTable::basic_add(int index, u1 *name, int len, 257 unsigned int hashValue, TRAPS) { 258 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), 259 "proposed name of symbol must be stable"); 260 261 // We assume that lookup() has been called already, that it failed, 262 // and symbol was not found. We create the symbol here. 263 Symbol* sym = allocate_symbol(name, len, CHECK_NULL); 264 265 // Allocation must be done before grabbing the SymbolTable_lock lock 266 MutexLocker ml(SymbolTable_lock, THREAD); 267 268 assert(sym->equals((char*)name, len), "symbol must be properly initialized"); 269 270 // Since look-up was done lock-free, we need to check if another 271 // thread beat us in the race to insert the symbol. 272 273 Symbol* test = lookup(index, (char*)name, len, hashValue); 274 if (test != NULL) { 275 // A race occurred and another thread introduced the symbol, this one 276 // will be dropped and collected. 277 delete sym; 278 assert(test->refcount() != 0, "lookup should have incremented the count"); 279 return test; 280 } 281 282 HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym); 283 sym->increment_refcount(); 284 add_entry(index, entry); 285 return sym; 286} 287 288bool SymbolTable::basic_add(constantPoolHandle cp, int names_count, 289 const char** names, int* lengths, 290 int* cp_indices, unsigned int* hashValues, 291 TRAPS) { 292 Symbol* syms[symbol_alloc_batch_size]; 293 bool allocated = allocate_symbols(names_count, (const u1**)names, lengths, 294 syms, CHECK_false); 295 if (!allocated) { 296 return false; 297 } 298 299 // Allocation must be done before grabbing the SymbolTable_lock lock 300 MutexLocker ml(SymbolTable_lock, THREAD); 301 302 for (int i=0; i<names_count; i++) { 303 assert(syms[i]->equals(names[i], lengths[i]), "symbol must be properly initialized"); 304 // Since look-up was done lock-free, we need to check if another 305 // thread beat us in the race to insert the symbol. 306 int index = hash_to_index(hashValues[i]); 307 Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]); 308 if (test != NULL) { 309 // A race occurred and another thread introduced the symbol, this one 310 // will be dropped and collected. Use test instead. 311 cp->symbol_at_put(cp_indices[i], test); 312 assert(test->refcount() != 0, "lookup should have incremented the count"); 313 delete syms[i]; 314 } else { 315 Symbol* sym = syms[i]; 316 HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym); 317 sym->increment_refcount(); // increment refcount in external hashtable 318 add_entry(index, entry); 319 cp->symbol_at_put(cp_indices[i], sym); 320 } 321 } 322 323 return true; 324} 325 326 327void SymbolTable::verify() { 328 for (int i = 0; i < the_table()->table_size(); ++i) { 329 HashtableEntry<Symbol*>* p = the_table()->bucket(i); 330 for ( ; p != NULL; p = p->next()) { 331 Symbol* s = (Symbol*)(p->literal()); 332 guarantee(s != NULL, "symbol is NULL"); 333 unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length()); 334 guarantee(p->hash() == h, "broken hash in symbol table entry"); 335 guarantee(the_table()->hash_to_index(h) == i, 336 "wrong index in symbol table"); 337 } 338 } 339} 340 341 342//--------------------------------------------------------------------------- 343// Non-product code 344 345#ifndef PRODUCT 346 347void SymbolTable::print_histogram() { 348 MutexLocker ml(SymbolTable_lock); 349 const int results_length = 100; 350 int results[results_length]; 351 int i,j; 352 353 // initialize results to zero 354 for (j = 0; j < results_length; j++) { 355 results[j] = 0; 356 } 357 358 int total = 0; 359 int max_symbols = 0; 360 int out_of_range = 0; 361 int memory_total = 0; 362 int count = 0; 363 for (i = 0; i < the_table()->table_size(); i++) { 364 HashtableEntry<Symbol*>* p = the_table()->bucket(i); 365 for ( ; p != NULL; p = p->next()) { 366 memory_total += p->literal()->object_size(); 367 count++; 368 int counter = p->literal()->utf8_length(); 369 total += counter; 370 if (counter < results_length) { 371 results[counter]++; 372 } else { 373 out_of_range++; 374 } 375 max_symbols = MAX2(max_symbols, counter); 376 } 377 } 378 tty->print_cr("Symbol Table:"); 379 tty->print_cr("Total number of symbols %5d", count); 380 tty->print_cr("Total size in memory %5dK", 381 (memory_total*HeapWordSize)/1024); 382 tty->print_cr("Total counted %5d", symbols_counted); 383 tty->print_cr("Total removed %5d", symbols_removed); 384 if (symbols_counted > 0) { 385 tty->print_cr("Percent removed %3.2f", 386 ((float)symbols_removed/(float)symbols_counted)* 100); 387 } 388 tty->print_cr("Reference counts %5d", Symbol::_total_count); 389 tty->print_cr("Histogram of symbol length:"); 390 tty->print_cr("%8s %5d", "Total ", total); 391 tty->print_cr("%8s %5d", "Maximum", max_symbols); 392 tty->print_cr("%8s %3.2f", "Average", 393 ((float) total / (float) the_table()->table_size())); 394 tty->print_cr("%s", "Histogram:"); 395 tty->print_cr(" %s %29s", "Length", "Number chains that length"); 396 for (i = 0; i < results_length; i++) { 397 if (results[i] > 0) { 398 tty->print_cr("%6d %10d", i, results[i]); 399 } 400 } 401 if (Verbose) { 402 int line_length = 70; 403 tty->print_cr("%s %30s", " Length", "Number chains that length"); 404 for (i = 0; i < results_length; i++) { 405 if (results[i] > 0) { 406 tty->print("%4d", i); 407 for (j = 0; (j < results[i]) && (j < line_length); j++) { 408 tty->print("%1s", "*"); 409 } 410 if (j == line_length) { 411 tty->print("%1s", "+"); 412 } 413 tty->cr(); 414 } 415 } 416 } 417 tty->print_cr(" %s %d: %d\n", "Number chains longer than", 418 results_length, out_of_range); 419} 420 421void SymbolTable::print() { 422 for (int i = 0; i < the_table()->table_size(); ++i) { 423 HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i); 424 HashtableEntry<Symbol*>* entry = the_table()->bucket(i); 425 if (entry != NULL) { 426 while (entry != NULL) { 427 tty->print(PTR_FORMAT " ", entry->literal()); 428 entry->literal()->print(); 429 tty->print(" %d", entry->literal()->refcount()); 430 p = entry->next_addr(); 431 entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p); 432 } 433 tty->cr(); 434 } 435 } 436} 437 438#endif // PRODUCT 439 440// -------------------------------------------------------------------------- 441 442#ifdef ASSERT 443class StableMemoryChecker : public StackObj { 444 enum { _bufsize = wordSize*4 }; 445 446 address _region; 447 jint _size; 448 u1 _save_buf[_bufsize]; 449 450 int sample(u1* save_buf) { 451 if (_size <= _bufsize) { 452 memcpy(save_buf, _region, _size); 453 return _size; 454 } else { 455 // copy head and tail 456 memcpy(&save_buf[0], _region, _bufsize/2); 457 memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2); 458 return (_bufsize/2)*2; 459 } 460 } 461 462 public: 463 StableMemoryChecker(const void* region, jint size) { 464 _region = (address) region; 465 _size = size; 466 sample(_save_buf); 467 } 468 469 bool verify() { 470 u1 check_buf[sizeof(_save_buf)]; 471 int check_size = sample(check_buf); 472 return (0 == memcmp(_save_buf, check_buf, check_size)); 473 } 474 475 void set_region(const void* region) { _region = (address) region; } 476}; 477#endif 478 479 480// -------------------------------------------------------------------------- 481 482 483// Compute the hash value for a java.lang.String object which would 484// contain the characters passed in. This hash value is used for at 485// least two purposes. 486// 487// (a) As the hash value used by the StringTable for bucket selection 488// and comparison (stored in the HashtableEntry structures). This 489// is used in the String.intern() method. 490// 491// (b) As the hash value used by the String object itself, in 492// String.hashCode(). This value is normally calculate in Java code 493// in the String.hashCode method(), but is precomputed for String 494// objects in the shared archive file. 495// 496// For this reason, THIS ALGORITHM MUST MATCH String.hashCode(). 497 498int StringTable::hash_string(jchar* s, int len) { 499 unsigned h = 0; 500 while (len-- > 0) { 501 h = 31*h + (unsigned) *s; 502 s++; 503 } 504 return h; 505} 506 507 508StringTable* StringTable::_the_table = NULL; 509 510oop StringTable::lookup(int index, jchar* name, 511 int len, unsigned int hash) { 512 for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) { 513 if (l->hash() == hash) { 514 if (java_lang_String::equals(l->literal(), name, len)) { 515 return l->literal(); 516 } 517 } 518 } 519 return NULL; 520} 521 522 523oop StringTable::basic_add(int index, Handle string_or_null, jchar* name, 524 int len, unsigned int hashValue, TRAPS) { 525 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); 526 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), 527 "proposed name of symbol must be stable"); 528 529 Handle string; 530 // try to reuse the string if possible 531 if (!string_or_null.is_null() && string_or_null()->is_perm()) { 532 string = string_or_null; 533 } else { 534 string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL); 535 } 536 537 // Allocation must be done before grapping the SymbolTable_lock lock 538 MutexLocker ml(StringTable_lock, THREAD); 539 540 assert(java_lang_String::equals(string(), name, len), 541 "string must be properly initialized"); 542 543 // Since look-up was done lock-free, we need to check if another 544 // thread beat us in the race to insert the symbol. 545 546 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int) 547 if (test != NULL) { 548 // Entry already added 549 return test; 550 } 551 552 HashtableEntry<oop>* entry = new_entry(hashValue, string()); 553 add_entry(index, entry); 554 return string(); 555} 556 557 558oop StringTable::lookup(Symbol* symbol) { 559 ResourceMark rm; 560 int length; 561 jchar* chars = symbol->as_unicode(length); 562 unsigned int hashValue = hash_string(chars, length); 563 int index = the_table()->hash_to_index(hashValue); 564 return the_table()->lookup(index, chars, length, hashValue); 565} 566 567 568oop StringTable::intern(Handle string_or_null, jchar* name, 569 int len, TRAPS) { 570 unsigned int hashValue = hash_string(name, len); 571 int index = the_table()->hash_to_index(hashValue); 572 oop string = the_table()->lookup(index, name, len, hashValue); 573 574 // Found 575 if (string != NULL) return string; 576 577 // Otherwise, add to symbol to table 578 return the_table()->basic_add(index, string_or_null, name, len, 579 hashValue, CHECK_NULL); 580} 581 582oop StringTable::intern(Symbol* symbol, TRAPS) { 583 if (symbol == NULL) return NULL; 584 ResourceMark rm(THREAD); 585 int length; 586 jchar* chars = symbol->as_unicode(length); 587 Handle string; 588 oop result = intern(string, chars, length, CHECK_NULL); 589 return result; 590} 591 592 593oop StringTable::intern(oop string, TRAPS) 594{ 595 if (string == NULL) return NULL; 596 ResourceMark rm(THREAD); 597 int length; 598 Handle h_string (THREAD, string); 599 jchar* chars = java_lang_String::as_unicode_string(string, length); 600 oop result = intern(h_string, chars, length, CHECK_NULL); 601 return result; 602} 603 604 605oop StringTable::intern(const char* utf8_string, TRAPS) { 606 if (utf8_string == NULL) return NULL; 607 ResourceMark rm(THREAD); 608 int length = UTF8::unicode_length(utf8_string); 609 jchar* chars = NEW_RESOURCE_ARRAY(jchar, length); 610 UTF8::convert_to_unicode(utf8_string, chars, length); 611 Handle string; 612 oop result = intern(string, chars, length, CHECK_NULL); 613 return result; 614} 615 616void StringTable::unlink(BoolObjectClosure* is_alive) { 617 // Readers of the table are unlocked, so we should only be removing 618 // entries at a safepoint. 619 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); 620 for (int i = 0; i < the_table()->table_size(); ++i) { 621 for (HashtableEntry<oop>** p = the_table()->bucket_addr(i); *p != NULL; ) { 622 HashtableEntry<oop>* entry = *p; 623 if (entry->is_shared()) { 624 break; 625 } 626 assert(entry->literal() != NULL, "just checking"); 627 if (is_alive->do_object_b(entry->literal())) { 628 p = entry->next_addr(); 629 } else { 630 *p = entry->next(); 631 the_table()->free_entry(entry); 632 } 633 } 634 } 635} 636 637void StringTable::oops_do(OopClosure* f) { 638 for (int i = 0; i < the_table()->table_size(); ++i) { 639 HashtableEntry<oop>** p = the_table()->bucket_addr(i); 640 HashtableEntry<oop>* entry = the_table()->bucket(i); 641 while (entry != NULL) { 642 f->do_oop((oop*)entry->literal_addr()); 643 644 // Did the closure remove the literal from the table? 645 if (entry->literal() == NULL) { 646 assert(!entry->is_shared(), "immutable hashtable entry?"); 647 *p = entry->next(); 648 the_table()->free_entry(entry); 649 } else { 650 p = entry->next_addr(); 651 } 652 entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p); 653 } 654 } 655} 656 657void StringTable::verify() { 658 for (int i = 0; i < the_table()->table_size(); ++i) { 659 HashtableEntry<oop>* p = the_table()->bucket(i); 660 for ( ; p != NULL; p = p->next()) { 661 oop s = p->literal(); 662 guarantee(s != NULL, "interned string is NULL"); 663 guarantee(s->is_perm(), "interned string not in permspace"); 664 665 int length; 666 jchar* chars = java_lang_String::as_unicode_string(s, length); 667 unsigned int h = hash_string(chars, length); 668 guarantee(p->hash() == h, "broken hash in string table entry"); 669 guarantee(the_table()->hash_to_index(h) == i, 670 "wrong index in string table"); 671 } 672 } 673} 674