1/*
2 * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "classfile/altHashing.hpp"
27#include "classfile/javaClasses.inline.hpp"
28#include "gc/g1/g1CollectedHeap.inline.hpp"
29#include "gc/g1/g1SATBCardTableModRefBS.hpp"
30#include "gc/g1/g1StringDedup.hpp"
31#include "gc/g1/g1StringDedupTable.hpp"
32#include "gc/shared/gcLocker.hpp"
33#include "logging/log.hpp"
34#include "memory/padded.inline.hpp"
35#include "oops/oop.inline.hpp"
36#include "oops/typeArrayOop.hpp"
37#include "runtime/mutexLocker.hpp"
38
39//
40// List of deduplication table entries. Links table
41// entries together using their _next fields.
42//
43class G1StringDedupEntryList : public CHeapObj<mtGC> {
44private:
45  G1StringDedupEntry* _list;
46  size_t              _length;
47
48public:
49  G1StringDedupEntryList() :
50    _list(NULL),
51    _length(0) {
52  }
53
54  void add(G1StringDedupEntry* entry) {
55    entry->set_next(_list);
56    _list = entry;
57    _length++;
58  }
59
60  G1StringDedupEntry* remove() {
61    G1StringDedupEntry* entry = _list;
62    if (entry != NULL) {
63      _list = entry->next();
64      _length--;
65    }
66    return entry;
67  }
68
69  G1StringDedupEntry* remove_all() {
70    G1StringDedupEntry* list = _list;
71    _list = NULL;
72    return list;
73  }
74
75  size_t length() {
76    return _length;
77  }
78};
79
80//
81// Cache of deduplication table entries. This cache provides fast allocation and
82// reuse of table entries to lower the pressure on the underlying allocator.
83// But more importantly, it provides fast/deferred freeing of table entries. This
84// is important because freeing of table entries is done during stop-the-world
85// phases and it is not uncommon for large number of entries to be freed at once.
86// Tables entries that are freed during these phases are placed onto a freelist in
87// the cache. The deduplication thread, which executes in a concurrent phase, will
88// later reuse or free the underlying memory for these entries.
89//
90// The cache allows for single-threaded allocations and multi-threaded frees.
91// Allocations are synchronized by StringDedupTable_lock as part of a table
92// modification.
93//
94class G1StringDedupEntryCache : public CHeapObj<mtGC> {
95private:
96  // One cache/overflow list per GC worker to allow lock less freeing of
97  // entries while doing a parallel scan of the table. Using PaddedEnd to
98  // avoid false sharing.
99  size_t                             _nlists;
100  size_t                             _max_list_length;
101  PaddedEnd<G1StringDedupEntryList>* _cached;
102  PaddedEnd<G1StringDedupEntryList>* _overflowed;
103
104public:
105  G1StringDedupEntryCache(size_t max_size);
106  ~G1StringDedupEntryCache();
107
108  // Set max number of table entries to cache.
109  void set_max_size(size_t max_size);
110
111  // Get a table entry from the cache, or allocate a new entry if the cache is empty.
112  G1StringDedupEntry* alloc();
113
114  // Insert a table entry into the cache.
115  void free(G1StringDedupEntry* entry, uint worker_id);
116
117  // Returns current number of entries in the cache.
118  size_t size();
119
120  // Deletes overflowed entries.
121  void delete_overflowed();
122};
123
124G1StringDedupEntryCache::G1StringDedupEntryCache(size_t max_size) :
125  _nlists(ParallelGCThreads),
126  _max_list_length(0),
127  _cached(PaddedArray<G1StringDedupEntryList, mtGC>::create_unfreeable((uint)_nlists)),
128  _overflowed(PaddedArray<G1StringDedupEntryList, mtGC>::create_unfreeable((uint)_nlists)) {
129  set_max_size(max_size);
130}
131
132G1StringDedupEntryCache::~G1StringDedupEntryCache() {
133  ShouldNotReachHere();
134}
135
136void G1StringDedupEntryCache::set_max_size(size_t size) {
137  _max_list_length = size / _nlists;
138}
139
140G1StringDedupEntry* G1StringDedupEntryCache::alloc() {
141  for (size_t i = 0; i < _nlists; i++) {
142    G1StringDedupEntry* entry = _cached[i].remove();
143    if (entry != NULL) {
144      return entry;
145    }
146  }
147  return new G1StringDedupEntry();
148}
149
150void G1StringDedupEntryCache::free(G1StringDedupEntry* entry, uint worker_id) {
151  assert(entry->obj() != NULL, "Double free");
152  assert(worker_id < _nlists, "Invalid worker id");
153
154  entry->set_obj(NULL);
155  entry->set_hash(0);
156
157  if (_cached[worker_id].length() < _max_list_length) {
158    // Cache is not full
159    _cached[worker_id].add(entry);
160  } else {
161    // Cache is full, add to overflow list for later deletion
162    _overflowed[worker_id].add(entry);
163  }
164}
165
166size_t G1StringDedupEntryCache::size() {
167  size_t size = 0;
168  for (size_t i = 0; i < _nlists; i++) {
169    size += _cached[i].length();
170  }
171  return size;
172}
173
174void G1StringDedupEntryCache::delete_overflowed() {
175  double start = os::elapsedTime();
176  uintx count = 0;
177
178  for (size_t i = 0; i < _nlists; i++) {
179    G1StringDedupEntry* entry;
180
181    {
182      // The overflow list can be modified during safepoints, therefore
183      // we temporarily join the suspendible thread set while removing
184      // all entries from the list.
185      SuspendibleThreadSetJoiner sts_join;
186      entry = _overflowed[i].remove_all();
187    }
188
189    // Delete all entries
190    while (entry != NULL) {
191      G1StringDedupEntry* next = entry->next();
192      delete entry;
193      entry = next;
194      count++;
195    }
196  }
197
198  double end = os::elapsedTime();
199  log_trace(gc, stringdedup)("Deleted " UINTX_FORMAT " entries, " G1_STRDEDUP_TIME_FORMAT_MS,
200                             count, G1_STRDEDUP_TIME_PARAM_MS(end - start));
201}
202
203G1StringDedupTable*      G1StringDedupTable::_table = NULL;
204G1StringDedupEntryCache* G1StringDedupTable::_entry_cache = NULL;
205
206const size_t             G1StringDedupTable::_min_size = (1 << 10);   // 1024
207const size_t             G1StringDedupTable::_max_size = (1 << 24);   // 16777216
208const double             G1StringDedupTable::_grow_load_factor = 2.0; // Grow table at 200% load
209const double             G1StringDedupTable::_shrink_load_factor = _grow_load_factor / 3.0; // Shrink table at 67% load
210const double             G1StringDedupTable::_max_cache_factor = 0.1; // Cache a maximum of 10% of the table size
211const uintx              G1StringDedupTable::_rehash_multiple = 60;   // Hash bucket has 60 times more collisions than expected
212const uintx              G1StringDedupTable::_rehash_threshold = (uintx)(_rehash_multiple * _grow_load_factor);
213
214uintx                    G1StringDedupTable::_entries_added = 0;
215uintx                    G1StringDedupTable::_entries_removed = 0;
216uintx                    G1StringDedupTable::_resize_count = 0;
217uintx                    G1StringDedupTable::_rehash_count = 0;
218
219G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) :
220  _size(size),
221  _entries(0),
222  _grow_threshold((uintx)(size * _grow_load_factor)),
223  _shrink_threshold((uintx)(size * _shrink_load_factor)),
224  _rehash_needed(false),
225  _hash_seed(hash_seed) {
226  assert(is_power_of_2(size), "Table size must be a power of 2");
227  _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC);
228  memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*));
229}
230
231G1StringDedupTable::~G1StringDedupTable() {
232  FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets);
233}
234
235void G1StringDedupTable::create() {
236  assert(_table == NULL, "One string deduplication table allowed");
237  _entry_cache = new G1StringDedupEntryCache(_min_size * _max_cache_factor);
238  _table = new G1StringDedupTable(_min_size);
239}
240
241void G1StringDedupTable::add(typeArrayOop value, bool latin1, unsigned int hash, G1StringDedupEntry** list) {
242  G1StringDedupEntry* entry = _entry_cache->alloc();
243  entry->set_obj(value);
244  entry->set_hash(hash);
245  entry->set_latin1(latin1);
246  entry->set_next(*list);
247  *list = entry;
248  _entries++;
249}
250
251void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) {
252  G1StringDedupEntry* entry = *pentry;
253  *pentry = entry->next();
254  _entry_cache->free(entry, worker_id);
255}
256
257void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) {
258  G1StringDedupEntry* entry = *pentry;
259  *pentry = entry->next();
260  unsigned int hash = entry->hash();
261  size_t index = dest->hash_to_index(hash);
262  G1StringDedupEntry** list = dest->bucket(index);
263  entry->set_next(*list);
264  *list = entry;
265}
266
267bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) {
268  return (value1 == value2 ||
269          (value1->length() == value2->length() &&
270           (!memcmp(value1->base(T_BYTE),
271                    value2->base(T_BYTE),
272                    value1->length() * sizeof(jbyte)))));
273}
274
275typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, bool latin1, unsigned int hash,
276                                        G1StringDedupEntry** list, uintx &count) {
277  for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) {
278    if (entry->hash() == hash && entry->latin1() == latin1) {
279      typeArrayOop existing_value = entry->obj();
280      if (equals(value, existing_value)) {
281        // Match found
282        return existing_value;
283      }
284    }
285    count++;
286  }
287
288  // Not found
289  return NULL;
290}
291
292typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, bool latin1, unsigned int hash) {
293  size_t index = hash_to_index(hash);
294  G1StringDedupEntry** list = bucket(index);
295  uintx count = 0;
296
297  // Lookup in list
298  typeArrayOop existing_value = lookup(value, latin1, hash, list, count);
299
300  // Check if rehash is needed
301  if (count > _rehash_threshold) {
302    _rehash_needed = true;
303  }
304
305  if (existing_value == NULL) {
306    // Not found, add new entry
307    add(value, latin1, hash, list);
308
309    // Update statistics
310    _entries_added++;
311  }
312
313  return existing_value;
314}
315
316unsigned int G1StringDedupTable::hash_code(typeArrayOop value, bool latin1) {
317  unsigned int hash;
318  int length = value->length();
319  if (latin1) {
320    const jbyte* data = (jbyte*)value->base(T_BYTE);
321    if (use_java_hash()) {
322      hash = java_lang_String::hash_code(data, length);
323    } else {
324      hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
325    }
326  } else {
327    length /= sizeof(jchar) / sizeof(jbyte); // Convert number of bytes to number of chars
328    const jchar* data = (jchar*)value->base(T_CHAR);
329    if (use_java_hash()) {
330      hash = java_lang_String::hash_code(data, length);
331    } else {
332      hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
333    }
334  }
335
336  return hash;
337}
338
339void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) {
340  assert(java_lang_String::is_instance(java_string), "Must be a string");
341  NoSafepointVerifier nsv;
342
343  stat.inc_inspected();
344
345  typeArrayOop value = java_lang_String::value(java_string);
346  if (value == NULL) {
347    // String has no value
348    stat.inc_skipped();
349    return;
350  }
351
352  bool latin1 = java_lang_String::is_latin1(java_string);
353  unsigned int hash = 0;
354
355  if (use_java_hash()) {
356    // Get hash code from cache
357    hash = java_lang_String::hash(java_string);
358  }
359
360  if (hash == 0) {
361    // Compute hash
362    hash = hash_code(value, latin1);
363    stat.inc_hashed();
364
365    if (use_java_hash() && hash != 0) {
366      // Store hash code in cache
367      java_lang_String::set_hash(java_string, hash);
368    }
369  }
370
371  typeArrayOop existing_value = lookup_or_add(value, latin1, hash);
372  if (existing_value == value) {
373    // Same value, already known
374    stat.inc_known();
375    return;
376  }
377
378  // Get size of value array
379  uintx size_in_bytes = value->size() * HeapWordSize;
380  stat.inc_new(size_in_bytes);
381
382  if (existing_value != NULL) {
383    // Enqueue the reference to make sure it is kept alive. Concurrent mark might
384    // otherwise declare it dead if there are no other strong references to this object.
385    G1SATBCardTableModRefBS::enqueue(existing_value);
386
387    // Existing value found, deduplicate string
388    java_lang_String::set_value(java_string, existing_value);
389
390    if (G1CollectedHeap::heap()->is_in_young(value)) {
391      stat.inc_deduped_young(size_in_bytes);
392    } else {
393      stat.inc_deduped_old(size_in_bytes);
394    }
395  }
396}
397
398G1StringDedupTable* G1StringDedupTable::prepare_resize() {
399  size_t size = _table->_size;
400
401  // Check if the hashtable needs to be resized
402  if (_table->_entries > _table->_grow_threshold) {
403    // Grow table, double the size
404    size *= 2;
405    if (size > _max_size) {
406      // Too big, don't resize
407      return NULL;
408    }
409  } else if (_table->_entries < _table->_shrink_threshold) {
410    // Shrink table, half the size
411    size /= 2;
412    if (size < _min_size) {
413      // Too small, don't resize
414      return NULL;
415    }
416  } else if (StringDeduplicationResizeALot) {
417    // Force grow
418    size *= 2;
419    if (size > _max_size) {
420      // Too big, force shrink instead
421      size /= 4;
422    }
423  } else {
424    // Resize not needed
425    return NULL;
426  }
427
428  // Update statistics
429  _resize_count++;
430
431  // Update max cache size
432  _entry_cache->set_max_size(size * _max_cache_factor);
433
434  // Allocate the new table. The new table will be populated by workers
435  // calling unlink_or_oops_do() and finally installed by finish_resize().
436  return new G1StringDedupTable(size, _table->_hash_seed);
437}
438
439void G1StringDedupTable::finish_resize(G1StringDedupTable* resized_table) {
440  assert(resized_table != NULL, "Invalid table");
441
442  resized_table->_entries = _table->_entries;
443
444  // Free old table
445  delete _table;
446
447  // Install new table
448  _table = resized_table;
449}
450
451void G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id) {
452  // The table is divided into partitions to allow lock-less parallel processing by
453  // multiple worker threads. A worker thread first claims a partition, which ensures
454  // exclusive access to that part of the table, then continues to process it. To allow
455  // shrinking of the table in parallel we also need to make sure that the same worker
456  // thread processes all partitions where entries will hash to the same destination
457  // partition. Since the table size is always a power of two and we always shrink by
458  // dividing the table in half, we know that for a given partition there is only one
459  // other partition whoes entries will hash to the same destination partition. That
460  // other partition is always the sibling partition in the second half of the table.
461  // For example, if the table is divided into 8 partitions, the sibling of partition 0
462  // is partition 4, the sibling of partition 1 is partition 5, etc.
463  size_t table_half = _table->_size / 2;
464
465  // Let each partition be one page worth of buckets
466  size_t partition_size = MIN2(table_half, os::vm_page_size() / sizeof(G1StringDedupEntry*));
467  assert(table_half % partition_size == 0, "Invalid partition size");
468
469  // Number of entries removed during the scan
470  uintx removed = 0;
471
472  for (;;) {
473    // Grab next partition to scan
474    size_t partition_begin = cl->claim_table_partition(partition_size);
475    size_t partition_end = partition_begin + partition_size;
476    if (partition_begin >= table_half) {
477      // End of table
478      break;
479    }
480
481    // Scan the partition followed by the sibling partition in the second half of the table
482    removed += unlink_or_oops_do(cl, partition_begin, partition_end, worker_id);
483    removed += unlink_or_oops_do(cl, table_half + partition_begin, table_half + partition_end, worker_id);
484  }
485
486  // Delayed update to avoid contention on the table lock
487  if (removed > 0) {
488    MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
489    _table->_entries -= removed;
490    _entries_removed += removed;
491  }
492}
493
494uintx G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl,
495                                            size_t partition_begin,
496                                            size_t partition_end,
497                                            uint worker_id) {
498  uintx removed = 0;
499  for (size_t bucket = partition_begin; bucket < partition_end; bucket++) {
500    G1StringDedupEntry** entry = _table->bucket(bucket);
501    while (*entry != NULL) {
502      oop* p = (oop*)(*entry)->obj_addr();
503      if (cl->is_alive(*p)) {
504        cl->keep_alive(p);
505        if (cl->is_resizing()) {
506          // We are resizing the table, transfer entry to the new table
507          _table->transfer(entry, cl->resized_table());
508        } else {
509          if (cl->is_rehashing()) {
510            // We are rehashing the table, rehash the entry but keep it
511            // in the table. We can't transfer entries into the new table
512            // at this point since we don't have exclusive access to all
513            // destination partitions. finish_rehash() will do a single
514            // threaded transfer of all entries.
515            typeArrayOop value = (typeArrayOop)*p;
516            bool latin1 = (*entry)->latin1();
517            unsigned int hash = hash_code(value, latin1);
518            (*entry)->set_hash(hash);
519          }
520
521          // Move to next entry
522          entry = (*entry)->next_addr();
523        }
524      } else {
525        // Not alive, remove entry from table
526        _table->remove(entry, worker_id);
527        removed++;
528      }
529    }
530  }
531
532  return removed;
533}
534
535G1StringDedupTable* G1StringDedupTable::prepare_rehash() {
536  if (!_table->_rehash_needed && !StringDeduplicationRehashALot) {
537    // Rehash not needed
538    return NULL;
539  }
540
541  // Update statistics
542  _rehash_count++;
543
544  // Compute new hash seed
545  _table->_hash_seed = AltHashing::compute_seed();
546
547  // Allocate the new table, same size and hash seed
548  return new G1StringDedupTable(_table->_size, _table->_hash_seed);
549}
550
551void G1StringDedupTable::finish_rehash(G1StringDedupTable* rehashed_table) {
552  assert(rehashed_table != NULL, "Invalid table");
553
554  // Move all newly rehashed entries into the correct buckets in the new table
555  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
556    G1StringDedupEntry** entry = _table->bucket(bucket);
557    while (*entry != NULL) {
558      _table->transfer(entry, rehashed_table);
559    }
560  }
561
562  rehashed_table->_entries = _table->_entries;
563
564  // Free old table
565  delete _table;
566
567  // Install new table
568  _table = rehashed_table;
569}
570
571void G1StringDedupTable::verify() {
572  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
573    // Verify entries
574    G1StringDedupEntry** entry = _table->bucket(bucket);
575    while (*entry != NULL) {
576      typeArrayOop value = (*entry)->obj();
577      guarantee(value != NULL, "Object must not be NULL");
578      guarantee(G1CollectedHeap::heap()->is_in_reserved(value), "Object must be on the heap");
579      guarantee(!value->is_forwarded(), "Object must not be forwarded");
580      guarantee(value->is_typeArray(), "Object must be a typeArrayOop");
581      bool latin1 = (*entry)->latin1();
582      unsigned int hash = hash_code(value, latin1);
583      guarantee((*entry)->hash() == hash, "Table entry has inorrect hash");
584      guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index");
585      entry = (*entry)->next_addr();
586    }
587
588    // Verify that we do not have entries with identical oops or identical arrays.
589    // We only need to compare entries in the same bucket. If the same oop or an
590    // identical array has been inserted more than once into different/incorrect
591    // buckets the verification step above will catch that.
592    G1StringDedupEntry** entry1 = _table->bucket(bucket);
593    while (*entry1 != NULL) {
594      typeArrayOop value1 = (*entry1)->obj();
595      bool latin1_1 = (*entry1)->latin1();
596      G1StringDedupEntry** entry2 = (*entry1)->next_addr();
597      while (*entry2 != NULL) {
598        typeArrayOop value2 = (*entry2)->obj();
599        bool latin1_2 = (*entry2)->latin1();
600        guarantee(latin1_1 != latin1_2 || !equals(value1, value2), "Table entries must not have identical arrays");
601        entry2 = (*entry2)->next_addr();
602      }
603      entry1 = (*entry1)->next_addr();
604    }
605  }
606}
607
608void G1StringDedupTable::clean_entry_cache() {
609  _entry_cache->delete_overflowed();
610}
611
612void G1StringDedupTable::print_statistics() {
613  Log(gc, stringdedup) log;
614  log.debug("  Table");
615  log.debug("    Memory Usage: " G1_STRDEDUP_BYTES_FORMAT_NS,
616            G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)));
617  log.debug("    Size: " SIZE_FORMAT ", Min: " SIZE_FORMAT ", Max: " SIZE_FORMAT, _table->_size, _min_size, _max_size);
618  log.debug("    Entries: " UINTX_FORMAT ", Load: " G1_STRDEDUP_PERCENT_FORMAT_NS ", Cached: " UINTX_FORMAT ", Added: " UINTX_FORMAT ", Removed: " UINTX_FORMAT,
619            _table->_entries, (double)_table->_entries / (double)_table->_size * 100.0, _entry_cache->size(), _entries_added, _entries_removed);
620  log.debug("    Resize Count: " UINTX_FORMAT ", Shrink Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS "), Grow Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS ")",
621            _resize_count, _table->_shrink_threshold, _shrink_load_factor * 100.0, _table->_grow_threshold, _grow_load_factor * 100.0);
622  log.debug("    Rehash Count: " UINTX_FORMAT ", Rehash Threshold: " UINTX_FORMAT ", Hash Seed: 0x%x", _rehash_count, _rehash_threshold, _table->_hash_seed);
623  log.debug("    Age Threshold: " UINTX_FORMAT, StringDeduplicationAgeThreshold);
624}
625