1/*
2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "classfile/metadataOnStackMark.hpp"
27#include "classfile/symbolTable.hpp"
28#include "code/codeCache.hpp"
29#include "gc/g1/concurrentMarkThread.inline.hpp"
30#include "gc/g1/g1CollectedHeap.inline.hpp"
31#include "gc/g1/g1CollectorState.hpp"
32#include "gc/g1/g1ConcurrentMark.inline.hpp"
33#include "gc/g1/g1HeapVerifier.hpp"
34#include "gc/g1/g1OopClosures.inline.hpp"
35#include "gc/g1/g1CardLiveData.inline.hpp"
36#include "gc/g1/g1Policy.hpp"
37#include "gc/g1/g1StringDedup.hpp"
38#include "gc/g1/heapRegion.inline.hpp"
39#include "gc/g1/heapRegionRemSet.hpp"
40#include "gc/g1/heapRegionSet.inline.hpp"
41#include "gc/g1/suspendibleThreadSet.hpp"
42#include "gc/shared/gcId.hpp"
43#include "gc/shared/gcTimer.hpp"
44#include "gc/shared/gcTrace.hpp"
45#include "gc/shared/gcTraceTime.inline.hpp"
46#include "gc/shared/genOopClosures.inline.hpp"
47#include "gc/shared/referencePolicy.hpp"
48#include "gc/shared/strongRootsScope.hpp"
49#include "gc/shared/taskqueue.inline.hpp"
50#include "gc/shared/vmGCOperations.hpp"
51#include "logging/log.hpp"
52#include "memory/allocation.hpp"
53#include "memory/resourceArea.hpp"
54#include "oops/oop.inline.hpp"
55#include "runtime/atomic.hpp"
56#include "runtime/handles.inline.hpp"
57#include "runtime/java.hpp"
58#include "runtime/prefetch.inline.hpp"
59#include "services/memTracker.hpp"
60#include "utilities/align.hpp"
61#include "utilities/growableArray.hpp"
62
63bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
64  assert(addr < _cm->finger(), "invariant");
65  assert(addr >= _task->finger(), "invariant");
66
67  // We move that task's local finger along.
68  _task->move_finger_to(addr);
69
70  _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
71  // we only partially drain the local queue and global stack
72  _task->drain_local_queue(true);
73  _task->drain_global_stack(true);
74
75  // if the has_aborted flag has been raised, we need to bail out of
76  // the iteration
77  return !_task->has_aborted();
78}
79
80G1CMMarkStack::G1CMMarkStack() :
81  _max_chunk_capacity(0),
82  _base(NULL),
83  _chunk_capacity(0) {
84  set_empty();
85}
86
87bool G1CMMarkStack::resize(size_t new_capacity) {
88  assert(is_empty(), "Only resize when stack is empty.");
89  assert(new_capacity <= _max_chunk_capacity,
90         "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
91
92  TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
93
94  if (new_base == NULL) {
95    log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
96    return false;
97  }
98  // Release old mapping.
99  if (_base != NULL) {
100    MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
101  }
102
103  _base = new_base;
104  _chunk_capacity = new_capacity;
105  set_empty();
106
107  return true;
108}
109
110size_t G1CMMarkStack::capacity_alignment() {
111  return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
112}
113
114bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
115  guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
116
117  size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
118
119  _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
120  size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
121
122  guarantee(initial_chunk_capacity <= _max_chunk_capacity,
123            "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
124            _max_chunk_capacity,
125            initial_chunk_capacity);
126
127  log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
128                initial_chunk_capacity, _max_chunk_capacity);
129
130  return resize(initial_chunk_capacity);
131}
132
133void G1CMMarkStack::expand() {
134  if (_chunk_capacity == _max_chunk_capacity) {
135    log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
136    return;
137  }
138  size_t old_capacity = _chunk_capacity;
139  // Double capacity if possible
140  size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
141
142  if (resize(new_capacity)) {
143    log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
144                  old_capacity, new_capacity);
145  } else {
146    log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
147                    old_capacity, new_capacity);
148  }
149}
150
151G1CMMarkStack::~G1CMMarkStack() {
152  if (_base != NULL) {
153    MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
154  }
155}
156
157void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
158  elem->next = *list;
159  *list = elem;
160}
161
162void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
163  MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
164  add_chunk_to_list(&_chunk_list, elem);
165  _chunks_in_chunk_list++;
166}
167
168void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
169  MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
170  add_chunk_to_list(&_free_list, elem);
171}
172
173G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
174  TaskQueueEntryChunk* result = *list;
175  if (result != NULL) {
176    *list = (*list)->next;
177  }
178  return result;
179}
180
181G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
182  MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
183  TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
184  if (result != NULL) {
185    _chunks_in_chunk_list--;
186  }
187  return result;
188}
189
190G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
191  MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
192  return remove_chunk_from_list(&_free_list);
193}
194
195G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
196  // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
197  // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
198  // wraparound of _hwm.
199  if (_hwm >= _chunk_capacity) {
200    return NULL;
201  }
202
203  size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
204  if (cur_idx >= _chunk_capacity) {
205    return NULL;
206  }
207
208  TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
209  result->next = NULL;
210  return result;
211}
212
213bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
214  // Get a new chunk.
215  TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
216
217  if (new_chunk == NULL) {
218    // Did not get a chunk from the free list. Allocate from backing memory.
219    new_chunk = allocate_new_chunk();
220
221    if (new_chunk == NULL) {
222      return false;
223    }
224  }
225
226  Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
227
228  add_chunk_to_chunk_list(new_chunk);
229
230  return true;
231}
232
233bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
234  TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
235
236  if (cur == NULL) {
237    return false;
238  }
239
240  Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
241
242  add_chunk_to_free_list(cur);
243  return true;
244}
245
246void G1CMMarkStack::set_empty() {
247  _chunks_in_chunk_list = 0;
248  _hwm = 0;
249  _chunk_list = NULL;
250  _free_list = NULL;
251}
252
253G1CMRootRegions::G1CMRootRegions() :
254  _cm(NULL), _scan_in_progress(false),
255  _should_abort(false), _claimed_survivor_index(0) { }
256
257void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
258  _survivors = survivors;
259  _cm = cm;
260}
261
262void G1CMRootRegions::prepare_for_scan() {
263  assert(!scan_in_progress(), "pre-condition");
264
265  // Currently, only survivors can be root regions.
266  _claimed_survivor_index = 0;
267  _scan_in_progress = _survivors->regions()->is_nonempty();
268  _should_abort = false;
269}
270
271HeapRegion* G1CMRootRegions::claim_next() {
272  if (_should_abort) {
273    // If someone has set the should_abort flag, we return NULL to
274    // force the caller to bail out of their loop.
275    return NULL;
276  }
277
278  // Currently, only survivors can be root regions.
279  const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
280
281  int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
282  if (claimed_index < survivor_regions->length()) {
283    return survivor_regions->at(claimed_index);
284  }
285  return NULL;
286}
287
288uint G1CMRootRegions::num_root_regions() const {
289  return (uint)_survivors->regions()->length();
290}
291
292void G1CMRootRegions::notify_scan_done() {
293  MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
294  _scan_in_progress = false;
295  RootRegionScan_lock->notify_all();
296}
297
298void G1CMRootRegions::cancel_scan() {
299  notify_scan_done();
300}
301
302void G1CMRootRegions::scan_finished() {
303  assert(scan_in_progress(), "pre-condition");
304
305  // Currently, only survivors can be root regions.
306  if (!_should_abort) {
307    assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
308    assert((uint)_claimed_survivor_index >= _survivors->length(),
309           "we should have claimed all survivors, claimed index = %u, length = %u",
310           (uint)_claimed_survivor_index, _survivors->length());
311  }
312
313  notify_scan_done();
314}
315
316bool G1CMRootRegions::wait_until_scan_finished() {
317  if (!scan_in_progress()) return false;
318
319  {
320    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
321    while (scan_in_progress()) {
322      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
323    }
324  }
325  return true;
326}
327
328uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
329  return MAX2((n_par_threads + 2) / 4, 1U);
330}
331
332G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
333  _g1h(g1h),
334  _markBitMap1(),
335  _markBitMap2(),
336  _parallel_marking_threads(0),
337  _max_parallel_marking_threads(0),
338  _sleep_factor(0.0),
339  _marking_task_overhead(1.0),
340  _cleanup_list("Cleanup List"),
341
342  _prevMarkBitMap(&_markBitMap1),
343  _nextMarkBitMap(&_markBitMap2),
344
345  _global_mark_stack(),
346  // _finger set in set_non_marking_state
347
348  _max_worker_id(ParallelGCThreads),
349  // _active_tasks set in set_non_marking_state
350  // _tasks set inside the constructor
351  _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
352  _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
353
354  _has_overflown(false),
355  _concurrent(false),
356  _has_aborted(false),
357  _restart_for_overflow(false),
358  _concurrent_marking_in_progress(false),
359  _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
360  _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
361
362  // _verbose_level set below
363
364  _init_times(),
365  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
366  _cleanup_times(),
367  _total_counting_time(0.0),
368  _total_rs_scrub_time(0.0),
369
370  _parallel_workers(NULL),
371
372  _completed_initialization(false) {
373
374  _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
375  _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
376
377  // Create & start a ConcurrentMark thread.
378  _cmThread = new ConcurrentMarkThread(this);
379  assert(cmThread() != NULL, "CM Thread should have been created");
380  assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
381  if (_cmThread->osthread() == NULL) {
382      vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
383  }
384
385  assert(CGC_lock != NULL, "Where's the CGC_lock?");
386
387  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
388  satb_qs.set_buffer_size(G1SATBBufferSize);
389
390  _root_regions.init(_g1h->survivor(), this);
391
392  if (ConcGCThreads > ParallelGCThreads) {
393    log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
394                    ConcGCThreads, ParallelGCThreads);
395    return;
396  }
397  if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
398    // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
399    // if both are set
400    _sleep_factor             = 0.0;
401    _marking_task_overhead    = 1.0;
402  } else if (G1MarkingOverheadPercent > 0) {
403    // We will calculate the number of parallel marking threads based
404    // on a target overhead with respect to the soft real-time goal
405    double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
406    double overall_cm_overhead =
407      (double) MaxGCPauseMillis * marking_overhead /
408      (double) GCPauseIntervalMillis;
409    double cpu_ratio = 1.0 / os::initial_active_processor_count();
410    double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
411    double marking_task_overhead =
412      overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
413    double sleep_factor =
414                       (1.0 - marking_task_overhead) / marking_task_overhead;
415
416    FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
417    _sleep_factor             = sleep_factor;
418    _marking_task_overhead    = marking_task_overhead;
419  } else {
420    // Calculate the number of parallel marking threads by scaling
421    // the number of parallel GC threads.
422    uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
423    FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
424    _sleep_factor             = 0.0;
425    _marking_task_overhead    = 1.0;
426  }
427
428  assert(ConcGCThreads > 0, "Should have been set");
429  log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
430  log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
431  _parallel_marking_threads = ConcGCThreads;
432  _max_parallel_marking_threads = _parallel_marking_threads;
433
434  _parallel_workers = new WorkGang("G1 Marker",
435       _max_parallel_marking_threads, false, true);
436  if (_parallel_workers == NULL) {
437    vm_exit_during_initialization("Failed necessary allocation.");
438  } else {
439    _parallel_workers->initialize_workers();
440  }
441
442  if (FLAG_IS_DEFAULT(MarkStackSize)) {
443    size_t mark_stack_size =
444      MIN2(MarkStackSizeMax,
445          MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
446    // Verify that the calculated value for MarkStackSize is in range.
447    // It would be nice to use the private utility routine from Arguments.
448    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
449      log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
450                      "must be between 1 and " SIZE_FORMAT,
451                      mark_stack_size, MarkStackSizeMax);
452      return;
453    }
454    FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
455  } else {
456    // Verify MarkStackSize is in range.
457    if (FLAG_IS_CMDLINE(MarkStackSize)) {
458      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
459        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
460          log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
461                          "must be between 1 and " SIZE_FORMAT,
462                          MarkStackSize, MarkStackSizeMax);
463          return;
464        }
465      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
466        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
467          log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
468                          " or for MarkStackSizeMax (" SIZE_FORMAT ")",
469                          MarkStackSize, MarkStackSizeMax);
470          return;
471        }
472      }
473    }
474  }
475
476  if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
477    vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
478  }
479
480  _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
481  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
482
483  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
484  _active_tasks = _max_worker_id;
485
486  for (uint i = 0; i < _max_worker_id; ++i) {
487    G1CMTaskQueue* task_queue = new G1CMTaskQueue();
488    task_queue->initialize();
489    _task_queues->register_queue(i, task_queue);
490
491    _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
492
493    _accum_task_vtime[i] = 0.0;
494  }
495
496  // so that the call below can read a sensible value
497  _heap_start = g1h->reserved_region().start();
498  set_non_marking_state();
499  _completed_initialization = true;
500}
501
502void G1ConcurrentMark::reset() {
503  // Starting values for these two. This should be called in a STW
504  // phase.
505  MemRegion reserved = _g1h->g1_reserved();
506  _heap_start = reserved.start();
507  _heap_end   = reserved.end();
508
509  // Separated the asserts so that we know which one fires.
510  assert(_heap_start != NULL, "heap bounds should look ok");
511  assert(_heap_end != NULL, "heap bounds should look ok");
512  assert(_heap_start < _heap_end, "heap bounds should look ok");
513
514  // Reset all the marking data structures and any necessary flags
515  reset_marking_state();
516
517  // We do reset all of them, since different phases will use
518  // different number of active threads. So, it's easiest to have all
519  // of them ready.
520  for (uint i = 0; i < _max_worker_id; ++i) {
521    _tasks[i]->reset(_nextMarkBitMap);
522  }
523
524  // we need this to make sure that the flag is on during the evac
525  // pause with initial mark piggy-backed
526  set_concurrent_marking_in_progress();
527}
528
529
530void G1ConcurrentMark::reset_marking_state() {
531  _global_mark_stack.set_empty();
532
533  // Expand the marking stack, if we have to and if we can.
534  if (has_overflown()) {
535    _global_mark_stack.expand();
536  }
537
538  clear_has_overflown();
539  _finger = _heap_start;
540
541  for (uint i = 0; i < _max_worker_id; ++i) {
542    G1CMTaskQueue* queue = _task_queues->queue(i);
543    queue->set_empty();
544  }
545}
546
547void G1ConcurrentMark::set_concurrency(uint active_tasks) {
548  assert(active_tasks <= _max_worker_id, "we should not have more");
549
550  _active_tasks = active_tasks;
551  // Need to update the three data structures below according to the
552  // number of active threads for this phase.
553  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
554  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
555  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
556}
557
558void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
559  set_concurrency(active_tasks);
560
561  _concurrent = concurrent;
562  // We propagate this to all tasks, not just the active ones.
563  for (uint i = 0; i < _max_worker_id; ++i)
564    _tasks[i]->set_concurrent(concurrent);
565
566  if (concurrent) {
567    set_concurrent_marking_in_progress();
568  } else {
569    // We currently assume that the concurrent flag has been set to
570    // false before we start remark. At this point we should also be
571    // in a STW phase.
572    assert(!concurrent_marking_in_progress(), "invariant");
573    assert(out_of_regions(),
574           "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
575           p2i(_finger), p2i(_heap_end));
576  }
577}
578
579void G1ConcurrentMark::set_non_marking_state() {
580  // We set the global marking state to some default values when we're
581  // not doing marking.
582  reset_marking_state();
583  _active_tasks = 0;
584  clear_concurrent_marking_in_progress();
585}
586
587G1ConcurrentMark::~G1ConcurrentMark() {
588  // The G1ConcurrentMark instance is never freed.
589  ShouldNotReachHere();
590}
591
592class G1ClearBitMapTask : public AbstractGangTask {
593public:
594  static size_t chunk_size() { return M; }
595
596private:
597  // Heap region closure used for clearing the given mark bitmap.
598  class G1ClearBitmapHRClosure : public HeapRegionClosure {
599  private:
600    G1CMBitMap* _bitmap;
601    G1ConcurrentMark* _cm;
602  public:
603    G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
604    }
605
606    virtual bool doHeapRegion(HeapRegion* r) {
607      size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
608
609      HeapWord* cur = r->bottom();
610      HeapWord* const end = r->end();
611
612      while (cur < end) {
613        MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
614        _bitmap->clear_range(mr);
615
616        cur += chunk_size_in_words;
617
618        // Abort iteration if after yielding the marking has been aborted.
619        if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
620          return true;
621        }
622        // Repeat the asserts from before the start of the closure. We will do them
623        // as asserts here to minimize their overhead on the product. However, we
624        // will have them as guarantees at the beginning / end of the bitmap
625        // clearing to get some checking in the product.
626        assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
627        assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
628      }
629      assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
630
631      return false;
632    }
633  };
634
635  G1ClearBitmapHRClosure _cl;
636  HeapRegionClaimer _hr_claimer;
637  bool _suspendible; // If the task is suspendible, workers must join the STS.
638
639public:
640  G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
641    AbstractGangTask("G1 Clear Bitmap"),
642    _cl(bitmap, suspendible ? cm : NULL),
643    _hr_claimer(n_workers),
644    _suspendible(suspendible)
645  { }
646
647  void work(uint worker_id) {
648    SuspendibleThreadSetJoiner sts_join(_suspendible);
649    G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
650  }
651
652  bool is_complete() {
653    return _cl.complete();
654  }
655};
656
657void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
658  assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
659
660  size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
661  size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
662
663  uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
664
665  G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
666
667  log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
668  workers->run_task(&cl, num_workers);
669  guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
670}
671
672void G1ConcurrentMark::cleanup_for_next_mark() {
673  // Make sure that the concurrent mark thread looks to still be in
674  // the current cycle.
675  guarantee(cmThread()->during_cycle(), "invariant");
676
677  // We are finishing up the current cycle by clearing the next
678  // marking bitmap and getting it ready for the next cycle. During
679  // this time no other cycle can start. So, let's make sure that this
680  // is the case.
681  guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
682
683  clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
684
685  // Clear the live count data. If the marking has been aborted, the abort()
686  // call already did that.
687  if (!has_aborted()) {
688    clear_live_data(_parallel_workers);
689    DEBUG_ONLY(verify_live_data_clear());
690  }
691
692  // Repeat the asserts from above.
693  guarantee(cmThread()->during_cycle(), "invariant");
694  guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
695}
696
697void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
698  assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
699  clear_bitmap(_prevMarkBitMap, workers, false);
700}
701
702class CheckBitmapClearHRClosure : public HeapRegionClosure {
703  G1CMBitMap* _bitmap;
704  bool _error;
705 public:
706  CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
707  }
708
709  virtual bool doHeapRegion(HeapRegion* r) {
710    // This closure can be called concurrently to the mutator, so we must make sure
711    // that the result of the getNextMarkedWordAddress() call is compared to the
712    // value passed to it as limit to detect any found bits.
713    // end never changes in G1.
714    HeapWord* end = r->end();
715    return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
716  }
717};
718
719bool G1ConcurrentMark::nextMarkBitmapIsClear() {
720  CheckBitmapClearHRClosure cl(_nextMarkBitMap);
721  _g1h->heap_region_iterate(&cl);
722  return cl.complete();
723}
724
725class NoteStartOfMarkHRClosure: public HeapRegionClosure {
726public:
727  bool doHeapRegion(HeapRegion* r) {
728    r->note_start_of_marking();
729    return false;
730  }
731};
732
733void G1ConcurrentMark::checkpointRootsInitialPre() {
734  G1CollectedHeap* g1h = G1CollectedHeap::heap();
735
736  _has_aborted = false;
737
738  // Initialize marking structures. This has to be done in a STW phase.
739  reset();
740
741  // For each region note start of marking.
742  NoteStartOfMarkHRClosure startcl;
743  g1h->heap_region_iterate(&startcl);
744}
745
746
747void G1ConcurrentMark::checkpointRootsInitialPost() {
748  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
749
750  // Start Concurrent Marking weak-reference discovery.
751  ReferenceProcessor* rp = g1h->ref_processor_cm();
752  // enable ("weak") refs discovery
753  rp->enable_discovery();
754  rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
755
756  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
757  // This is the start of  the marking cycle, we're expected all
758  // threads to have SATB queues with active set to false.
759  satb_mq_set.set_active_all_threads(true, /* new active value */
760                                     false /* expected_active */);
761
762  _root_regions.prepare_for_scan();
763
764  // update_g1_committed() will be called at the end of an evac pause
765  // when marking is on. So, it's also called at the end of the
766  // initial-mark pause to update the heap end, if the heap expands
767  // during it. No need to call it here.
768}
769
770/*
771 * Notice that in the next two methods, we actually leave the STS
772 * during the barrier sync and join it immediately afterwards. If we
773 * do not do this, the following deadlock can occur: one thread could
774 * be in the barrier sync code, waiting for the other thread to also
775 * sync up, whereas another one could be trying to yield, while also
776 * waiting for the other threads to sync up too.
777 *
778 * Note, however, that this code is also used during remark and in
779 * this case we should not attempt to leave / enter the STS, otherwise
780 * we'll either hit an assert (debug / fastdebug) or deadlock
781 * (product). So we should only leave / enter the STS if we are
782 * operating concurrently.
783 *
784 * Because the thread that does the sync barrier has left the STS, it
785 * is possible to be suspended for a Full GC or an evacuation pause
786 * could occur. This is actually safe, since the entering the sync
787 * barrier is one of the last things do_marking_step() does, and it
788 * doesn't manipulate any data structures afterwards.
789 */
790
791void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
792  bool barrier_aborted;
793  {
794    SuspendibleThreadSetLeaver sts_leave(concurrent());
795    barrier_aborted = !_first_overflow_barrier_sync.enter();
796  }
797
798  // at this point everyone should have synced up and not be doing any
799  // more work
800
801  if (barrier_aborted) {
802    // If the barrier aborted we ignore the overflow condition and
803    // just abort the whole marking phase as quickly as possible.
804    return;
805  }
806
807  // If we're executing the concurrent phase of marking, reset the marking
808  // state; otherwise the marking state is reset after reference processing,
809  // during the remark pause.
810  // If we reset here as a result of an overflow during the remark we will
811  // see assertion failures from any subsequent set_concurrency_and_phase()
812  // calls.
813  if (concurrent()) {
814    // let the task associated with with worker 0 do this
815    if (worker_id == 0) {
816      // task 0 is responsible for clearing the global data structures
817      // We should be here because of an overflow. During STW we should
818      // not clear the overflow flag since we rely on it being true when
819      // we exit this method to abort the pause and restart concurrent
820      // marking.
821      reset_marking_state();
822
823      log_info(gc, marking)("Concurrent Mark reset for overflow");
824    }
825  }
826
827  // after this, each task should reset its own data structures then
828  // then go into the second barrier
829}
830
831void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
832  SuspendibleThreadSetLeaver sts_leave(concurrent());
833  _second_overflow_barrier_sync.enter();
834
835  // at this point everything should be re-initialized and ready to go
836}
837
838class G1CMConcurrentMarkingTask: public AbstractGangTask {
839private:
840  G1ConcurrentMark*     _cm;
841  ConcurrentMarkThread* _cmt;
842
843public:
844  void work(uint worker_id) {
845    assert(Thread::current()->is_ConcurrentGC_thread(),
846           "this should only be done by a conc GC thread");
847    ResourceMark rm;
848
849    double start_vtime = os::elapsedVTime();
850
851    {
852      SuspendibleThreadSetJoiner sts_join;
853
854      assert(worker_id < _cm->active_tasks(), "invariant");
855      G1CMTask* the_task = _cm->task(worker_id);
856      the_task->record_start_time();
857      if (!_cm->has_aborted()) {
858        do {
859          double start_vtime_sec = os::elapsedVTime();
860          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
861
862          the_task->do_marking_step(mark_step_duration_ms,
863                                    true  /* do_termination */,
864                                    false /* is_serial*/);
865
866          double end_vtime_sec = os::elapsedVTime();
867          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
868          _cm->do_yield_check();
869
870          jlong sleep_time_ms;
871          if (!_cm->has_aborted() && the_task->has_aborted()) {
872            sleep_time_ms =
873              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
874            {
875              SuspendibleThreadSetLeaver sts_leave;
876              os::sleep(Thread::current(), sleep_time_ms, false);
877            }
878          }
879        } while (!_cm->has_aborted() && the_task->has_aborted());
880      }
881      the_task->record_end_time();
882      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
883    }
884
885    double end_vtime = os::elapsedVTime();
886    _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
887  }
888
889  G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
890                            ConcurrentMarkThread* cmt) :
891      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
892
893  ~G1CMConcurrentMarkingTask() { }
894};
895
896// Calculates the number of active workers for a concurrent
897// phase.
898uint G1ConcurrentMark::calc_parallel_marking_threads() {
899  uint n_conc_workers = 0;
900  if (!UseDynamicNumberOfGCThreads ||
901      (!FLAG_IS_DEFAULT(ConcGCThreads) &&
902       !ForceDynamicNumberOfGCThreads)) {
903    n_conc_workers = max_parallel_marking_threads();
904  } else {
905    n_conc_workers =
906      AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
907                                                      1, /* Minimum workers */
908                                                      parallel_marking_threads(),
909                                                      Threads::number_of_non_daemon_threads());
910    // Don't scale down "n_conc_workers" by scale_parallel_threads() because
911    // that scaling has already gone into "_max_parallel_marking_threads".
912  }
913  assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
914         "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
915         max_parallel_marking_threads(), n_conc_workers);
916  return n_conc_workers;
917}
918
919void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
920  // Currently, only survivors can be root regions.
921  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
922  G1RootRegionScanClosure cl(_g1h, this);
923
924  const uintx interval = PrefetchScanIntervalInBytes;
925  HeapWord* curr = hr->bottom();
926  const HeapWord* end = hr->top();
927  while (curr < end) {
928    Prefetch::read(curr, interval);
929    oop obj = oop(curr);
930    int size = obj->oop_iterate_size(&cl);
931    assert(size == obj->size(), "sanity");
932    curr += size;
933  }
934}
935
936class G1CMRootRegionScanTask : public AbstractGangTask {
937private:
938  G1ConcurrentMark* _cm;
939
940public:
941  G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
942    AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
943
944  void work(uint worker_id) {
945    assert(Thread::current()->is_ConcurrentGC_thread(),
946           "this should only be done by a conc GC thread");
947
948    G1CMRootRegions* root_regions = _cm->root_regions();
949    HeapRegion* hr = root_regions->claim_next();
950    while (hr != NULL) {
951      _cm->scanRootRegion(hr);
952      hr = root_regions->claim_next();
953    }
954  }
955};
956
957void G1ConcurrentMark::scan_root_regions() {
958  // scan_in_progress() will have been set to true only if there was
959  // at least one root region to scan. So, if it's false, we
960  // should not attempt to do any further work.
961  if (root_regions()->scan_in_progress()) {
962    assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
963
964    _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
965                                     // We distribute work on a per-region basis, so starting
966                                     // more threads than that is useless.
967                                     root_regions()->num_root_regions());
968    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
969           "Maximum number of marking threads exceeded");
970
971    G1CMRootRegionScanTask task(this);
972    log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
973                        task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
974    _parallel_workers->run_task(&task, _parallel_marking_threads);
975
976    // It's possible that has_aborted() is true here without actually
977    // aborting the survivor scan earlier. This is OK as it's
978    // mainly used for sanity checking.
979    root_regions()->scan_finished();
980  }
981}
982
983void G1ConcurrentMark::concurrent_cycle_start() {
984  _gc_timer_cm->register_gc_start();
985
986  _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
987
988  _g1h->trace_heap_before_gc(_gc_tracer_cm);
989}
990
991void G1ConcurrentMark::concurrent_cycle_end() {
992  _g1h->trace_heap_after_gc(_gc_tracer_cm);
993
994  if (has_aborted()) {
995    _gc_tracer_cm->report_concurrent_mode_failure();
996  }
997
998  _gc_timer_cm->register_gc_end();
999
1000  _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1001}
1002
1003void G1ConcurrentMark::mark_from_roots() {
1004  // we might be tempted to assert that:
1005  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1006  //        "inconsistent argument?");
1007  // However that wouldn't be right, because it's possible that
1008  // a safepoint is indeed in progress as a younger generation
1009  // stop-the-world GC happens even as we mark in this generation.
1010
1011  _restart_for_overflow = false;
1012
1013  // _g1h has _n_par_threads
1014  _parallel_marking_threads = calc_parallel_marking_threads();
1015  assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1016    "Maximum number of marking threads exceeded");
1017
1018  uint active_workers = MAX2(1U, parallel_marking_threads());
1019  assert(active_workers > 0, "Should have been set");
1020
1021  // Setting active workers is not guaranteed since fewer
1022  // worker threads may currently exist and more may not be
1023  // available.
1024  active_workers = _parallel_workers->update_active_workers(active_workers);
1025  log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1026
1027  // Parallel task terminator is set in "set_concurrency_and_phase()"
1028  set_concurrency_and_phase(active_workers, true /* concurrent */);
1029
1030  G1CMConcurrentMarkingTask markingTask(this, cmThread());
1031  _parallel_workers->run_task(&markingTask);
1032  print_stats();
1033}
1034
1035void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1036  // world is stopped at this checkpoint
1037  assert(SafepointSynchronize::is_at_safepoint(),
1038         "world should be stopped");
1039
1040  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1041
1042  // If a full collection has happened, we shouldn't do this.
1043  if (has_aborted()) {
1044    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1045    return;
1046  }
1047
1048  SvcGCMarker sgcm(SvcGCMarker::OTHER);
1049
1050  if (VerifyDuringGC) {
1051    HandleMark hm;  // handle scope
1052    g1h->prepare_for_verify();
1053    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1054  }
1055  g1h->verifier()->check_bitmaps("Remark Start");
1056
1057  G1Policy* g1p = g1h->g1_policy();
1058  g1p->record_concurrent_mark_remark_start();
1059
1060  double start = os::elapsedTime();
1061
1062  checkpointRootsFinalWork();
1063
1064  double mark_work_end = os::elapsedTime();
1065
1066  weakRefsWork(clear_all_soft_refs);
1067
1068  if (has_overflown()) {
1069    // We overflowed.  Restart concurrent marking.
1070    _restart_for_overflow = true;
1071
1072    // Verify the heap w.r.t. the previous marking bitmap.
1073    if (VerifyDuringGC) {
1074      HandleMark hm;  // handle scope
1075      g1h->prepare_for_verify();
1076      Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1077    }
1078
1079    // Clear the marking state because we will be restarting
1080    // marking due to overflowing the global mark stack.
1081    reset_marking_state();
1082  } else {
1083    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1084    // We're done with marking.
1085    // This is the end of  the marking cycle, we're expected all
1086    // threads to have SATB queues with active set to true.
1087    satb_mq_set.set_active_all_threads(false, /* new active value */
1088                                       true /* expected_active */);
1089
1090    if (VerifyDuringGC) {
1091      HandleMark hm;  // handle scope
1092      g1h->prepare_for_verify();
1093      Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1094    }
1095    g1h->verifier()->check_bitmaps("Remark End");
1096    assert(!restart_for_overflow(), "sanity");
1097    // Completely reset the marking state since marking completed
1098    set_non_marking_state();
1099  }
1100
1101  // Statistics
1102  double now = os::elapsedTime();
1103  _remark_mark_times.add((mark_work_end - start) * 1000.0);
1104  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1105  _remark_times.add((now - start) * 1000.0);
1106
1107  g1p->record_concurrent_mark_remark_end();
1108
1109  G1CMIsAliveClosure is_alive(g1h);
1110  _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1111}
1112
1113class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1114  G1CollectedHeap* _g1;
1115  size_t _freed_bytes;
1116  FreeRegionList* _local_cleanup_list;
1117  uint _old_regions_removed;
1118  uint _humongous_regions_removed;
1119  HRRSCleanupTask* _hrrs_cleanup_task;
1120
1121public:
1122  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1123                             FreeRegionList* local_cleanup_list,
1124                             HRRSCleanupTask* hrrs_cleanup_task) :
1125    _g1(g1),
1126    _freed_bytes(0),
1127    _local_cleanup_list(local_cleanup_list),
1128    _old_regions_removed(0),
1129    _humongous_regions_removed(0),
1130    _hrrs_cleanup_task(hrrs_cleanup_task) { }
1131
1132  size_t freed_bytes() { return _freed_bytes; }
1133  const uint old_regions_removed() { return _old_regions_removed; }
1134  const uint humongous_regions_removed() { return _humongous_regions_removed; }
1135
1136  bool doHeapRegion(HeapRegion *hr) {
1137    _g1->reset_gc_time_stamps(hr);
1138    hr->note_end_of_marking();
1139
1140    if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1141      _freed_bytes += hr->used();
1142      hr->set_containing_set(NULL);
1143      if (hr->is_humongous()) {
1144        _humongous_regions_removed++;
1145        _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1146      } else {
1147        _old_regions_removed++;
1148        _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1149      }
1150    } else {
1151      hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1152    }
1153
1154    return false;
1155  }
1156};
1157
1158class G1ParNoteEndTask: public AbstractGangTask {
1159  friend class G1NoteEndOfConcMarkClosure;
1160
1161protected:
1162  G1CollectedHeap* _g1h;
1163  FreeRegionList* _cleanup_list;
1164  HeapRegionClaimer _hrclaimer;
1165
1166public:
1167  G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1168      AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1169  }
1170
1171  void work(uint worker_id) {
1172    FreeRegionList local_cleanup_list("Local Cleanup List");
1173    HRRSCleanupTask hrrs_cleanup_task;
1174    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1175                                           &hrrs_cleanup_task);
1176    _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1177    assert(g1_note_end.complete(), "Shouldn't have yielded!");
1178
1179    // Now update the lists
1180    _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1181    {
1182      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1183      _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1184
1185      // If we iterate over the global cleanup list at the end of
1186      // cleanup to do this printing we will not guarantee to only
1187      // generate output for the newly-reclaimed regions (the list
1188      // might not be empty at the beginning of cleanup; we might
1189      // still be working on its previous contents). So we do the
1190      // printing here, before we append the new regions to the global
1191      // cleanup list.
1192
1193      G1HRPrinter* hr_printer = _g1h->hr_printer();
1194      if (hr_printer->is_active()) {
1195        FreeRegionListIterator iter(&local_cleanup_list);
1196        while (iter.more_available()) {
1197          HeapRegion* hr = iter.get_next();
1198          hr_printer->cleanup(hr);
1199        }
1200      }
1201
1202      _cleanup_list->add_ordered(&local_cleanup_list);
1203      assert(local_cleanup_list.is_empty(), "post-condition");
1204
1205      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1206    }
1207  }
1208};
1209
1210void G1ConcurrentMark::cleanup() {
1211  // world is stopped at this checkpoint
1212  assert(SafepointSynchronize::is_at_safepoint(),
1213         "world should be stopped");
1214  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1215
1216  // If a full collection has happened, we shouldn't do this.
1217  if (has_aborted()) {
1218    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1219    return;
1220  }
1221
1222  g1h->verifier()->verify_region_sets_optional();
1223
1224  if (VerifyDuringGC) {
1225    HandleMark hm;  // handle scope
1226    g1h->prepare_for_verify();
1227    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1228  }
1229  g1h->verifier()->check_bitmaps("Cleanup Start");
1230
1231  G1Policy* g1p = g1h->g1_policy();
1232  g1p->record_concurrent_mark_cleanup_start();
1233
1234  double start = os::elapsedTime();
1235
1236  HeapRegionRemSet::reset_for_cleanup_tasks();
1237
1238  {
1239    GCTraceTime(Debug, gc)("Finalize Live Data");
1240    finalize_live_data();
1241  }
1242
1243  if (VerifyDuringGC) {
1244    GCTraceTime(Debug, gc)("Verify Live Data");
1245    verify_live_data();
1246  }
1247
1248  g1h->collector_state()->set_mark_in_progress(false);
1249
1250  double count_end = os::elapsedTime();
1251  double this_final_counting_time = (count_end - start);
1252  _total_counting_time += this_final_counting_time;
1253
1254  if (log_is_enabled(Trace, gc, liveness)) {
1255    G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1256    _g1h->heap_region_iterate(&cl);
1257  }
1258
1259  // Install newly created mark bitMap as "prev".
1260  swapMarkBitMaps();
1261
1262  g1h->reset_gc_time_stamp();
1263
1264  uint n_workers = _g1h->workers()->active_workers();
1265
1266  // Note end of marking in all heap regions.
1267  G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1268  g1h->workers()->run_task(&g1_par_note_end_task);
1269  g1h->check_gc_time_stamps();
1270
1271  if (!cleanup_list_is_empty()) {
1272    // The cleanup list is not empty, so we'll have to process it
1273    // concurrently. Notify anyone else that might be wanting free
1274    // regions that there will be more free regions coming soon.
1275    g1h->set_free_regions_coming();
1276  }
1277
1278  // call below, since it affects the metric by which we sort the heap
1279  // regions.
1280  if (G1ScrubRemSets) {
1281    double rs_scrub_start = os::elapsedTime();
1282    g1h->scrub_rem_set();
1283    _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1284  }
1285
1286  // this will also free any regions totally full of garbage objects,
1287  // and sort the regions.
1288  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1289
1290  // Statistics.
1291  double end = os::elapsedTime();
1292  _cleanup_times.add((end - start) * 1000.0);
1293
1294  // Clean up will have freed any regions completely full of garbage.
1295  // Update the soft reference policy with the new heap occupancy.
1296  Universe::update_heap_info_at_gc();
1297
1298  if (VerifyDuringGC) {
1299    HandleMark hm;  // handle scope
1300    g1h->prepare_for_verify();
1301    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1302  }
1303
1304  g1h->verifier()->check_bitmaps("Cleanup End");
1305
1306  g1h->verifier()->verify_region_sets_optional();
1307
1308  // We need to make this be a "collection" so any collection pause that
1309  // races with it goes around and waits for completeCleanup to finish.
1310  g1h->increment_total_collections();
1311
1312  // Clean out dead classes and update Metaspace sizes.
1313  if (ClassUnloadingWithConcurrentMark) {
1314    ClassLoaderDataGraph::purge();
1315  }
1316  MetaspaceGC::compute_new_size();
1317
1318  // We reclaimed old regions so we should calculate the sizes to make
1319  // sure we update the old gen/space data.
1320  g1h->g1mm()->update_sizes();
1321  g1h->allocation_context_stats().update_after_mark();
1322}
1323
1324void G1ConcurrentMark::complete_cleanup() {
1325  if (has_aborted()) return;
1326
1327  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1328
1329  _cleanup_list.verify_optional();
1330  FreeRegionList tmp_free_list("Tmp Free List");
1331
1332  log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1333                                  "cleanup list has %u entries",
1334                                  _cleanup_list.length());
1335
1336  // No one else should be accessing the _cleanup_list at this point,
1337  // so it is not necessary to take any locks
1338  while (!_cleanup_list.is_empty()) {
1339    HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1340    assert(hr != NULL, "Got NULL from a non-empty list");
1341    hr->par_clear();
1342    tmp_free_list.add_ordered(hr);
1343
1344    // Instead of adding one region at a time to the secondary_free_list,
1345    // we accumulate them in the local list and move them a few at a
1346    // time. This also cuts down on the number of notify_all() calls
1347    // we do during this process. We'll also append the local list when
1348    // _cleanup_list is empty (which means we just removed the last
1349    // region from the _cleanup_list).
1350    if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1351        _cleanup_list.is_empty()) {
1352      log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1353                                      "appending %u entries to the secondary_free_list, "
1354                                      "cleanup list still has %u entries",
1355                                      tmp_free_list.length(),
1356                                      _cleanup_list.length());
1357
1358      {
1359        MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1360        g1h->secondary_free_list_add(&tmp_free_list);
1361        SecondaryFreeList_lock->notify_all();
1362      }
1363#ifndef PRODUCT
1364      if (G1StressConcRegionFreeing) {
1365        for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1366          os::sleep(Thread::current(), (jlong) 1, false);
1367        }
1368      }
1369#endif
1370    }
1371  }
1372  assert(tmp_free_list.is_empty(), "post-condition");
1373}
1374
1375// Supporting Object and Oop closures for reference discovery
1376// and processing in during marking
1377
1378bool G1CMIsAliveClosure::do_object_b(oop obj) {
1379  HeapWord* addr = (HeapWord*)obj;
1380  return addr != NULL &&
1381         (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1382}
1383
1384// 'Keep Alive' oop closure used by both serial parallel reference processing.
1385// Uses the G1CMTask associated with a worker thread (for serial reference
1386// processing the G1CMTask for worker 0 is used) to preserve (mark) and
1387// trace referent objects.
1388//
1389// Using the G1CMTask and embedded local queues avoids having the worker
1390// threads operating on the global mark stack. This reduces the risk
1391// of overflowing the stack - which we would rather avoid at this late
1392// state. Also using the tasks' local queues removes the potential
1393// of the workers interfering with each other that could occur if
1394// operating on the global stack.
1395
1396class G1CMKeepAliveAndDrainClosure: public OopClosure {
1397  G1ConcurrentMark* _cm;
1398  G1CMTask*         _task;
1399  int               _ref_counter_limit;
1400  int               _ref_counter;
1401  bool              _is_serial;
1402 public:
1403  G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1404    _cm(cm), _task(task), _is_serial(is_serial),
1405    _ref_counter_limit(G1RefProcDrainInterval) {
1406    assert(_ref_counter_limit > 0, "sanity");
1407    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1408    _ref_counter = _ref_counter_limit;
1409  }
1410
1411  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1412  virtual void do_oop(      oop* p) { do_oop_work(p); }
1413
1414  template <class T> void do_oop_work(T* p) {
1415    if (!_cm->has_overflown()) {
1416      oop obj = oopDesc::load_decode_heap_oop(p);
1417      _task->deal_with_reference(obj);
1418      _ref_counter--;
1419
1420      if (_ref_counter == 0) {
1421        // We have dealt with _ref_counter_limit references, pushing them
1422        // and objects reachable from them on to the local stack (and
1423        // possibly the global stack). Call G1CMTask::do_marking_step() to
1424        // process these entries.
1425        //
1426        // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1427        // there's nothing more to do (i.e. we're done with the entries that
1428        // were pushed as a result of the G1CMTask::deal_with_reference() calls
1429        // above) or we overflow.
1430        //
1431        // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1432        // flag while there may still be some work to do. (See the comment at
1433        // the beginning of G1CMTask::do_marking_step() for those conditions -
1434        // one of which is reaching the specified time target.) It is only
1435        // when G1CMTask::do_marking_step() returns without setting the
1436        // has_aborted() flag that the marking step has completed.
1437        do {
1438          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1439          _task->do_marking_step(mark_step_duration_ms,
1440                                 false      /* do_termination */,
1441                                 _is_serial);
1442        } while (_task->has_aborted() && !_cm->has_overflown());
1443        _ref_counter = _ref_counter_limit;
1444      }
1445    }
1446  }
1447};
1448
1449// 'Drain' oop closure used by both serial and parallel reference processing.
1450// Uses the G1CMTask associated with a given worker thread (for serial
1451// reference processing the G1CMtask for worker 0 is used). Calls the
1452// do_marking_step routine, with an unbelievably large timeout value,
1453// to drain the marking data structures of the remaining entries
1454// added by the 'keep alive' oop closure above.
1455
1456class G1CMDrainMarkingStackClosure: public VoidClosure {
1457  G1ConcurrentMark* _cm;
1458  G1CMTask*         _task;
1459  bool              _is_serial;
1460 public:
1461  G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1462    _cm(cm), _task(task), _is_serial(is_serial) {
1463    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1464  }
1465
1466  void do_void() {
1467    do {
1468      // We call G1CMTask::do_marking_step() to completely drain the local
1469      // and global marking stacks of entries pushed by the 'keep alive'
1470      // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1471      //
1472      // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1473      // if there's nothing more to do (i.e. we've completely drained the
1474      // entries that were pushed as a a result of applying the 'keep alive'
1475      // closure to the entries on the discovered ref lists) or we overflow
1476      // the global marking stack.
1477      //
1478      // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1479      // flag while there may still be some work to do. (See the comment at
1480      // the beginning of G1CMTask::do_marking_step() for those conditions -
1481      // one of which is reaching the specified time target.) It is only
1482      // when G1CMTask::do_marking_step() returns without setting the
1483      // has_aborted() flag that the marking step has completed.
1484
1485      _task->do_marking_step(1000000000.0 /* something very large */,
1486                             true         /* do_termination */,
1487                             _is_serial);
1488    } while (_task->has_aborted() && !_cm->has_overflown());
1489  }
1490};
1491
1492// Implementation of AbstractRefProcTaskExecutor for parallel
1493// reference processing at the end of G1 concurrent marking
1494
1495class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1496private:
1497  G1CollectedHeap*  _g1h;
1498  G1ConcurrentMark* _cm;
1499  WorkGang*         _workers;
1500  uint              _active_workers;
1501
1502public:
1503  G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1504                          G1ConcurrentMark* cm,
1505                          WorkGang* workers,
1506                          uint n_workers) :
1507    _g1h(g1h), _cm(cm),
1508    _workers(workers), _active_workers(n_workers) { }
1509
1510  // Executes the given task using concurrent marking worker threads.
1511  virtual void execute(ProcessTask& task);
1512  virtual void execute(EnqueueTask& task);
1513};
1514
1515class G1CMRefProcTaskProxy: public AbstractGangTask {
1516  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1517  ProcessTask&      _proc_task;
1518  G1CollectedHeap*  _g1h;
1519  G1ConcurrentMark* _cm;
1520
1521public:
1522  G1CMRefProcTaskProxy(ProcessTask& proc_task,
1523                       G1CollectedHeap* g1h,
1524                       G1ConcurrentMark* cm) :
1525    AbstractGangTask("Process reference objects in parallel"),
1526    _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1527    ReferenceProcessor* rp = _g1h->ref_processor_cm();
1528    assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1529  }
1530
1531  virtual void work(uint worker_id) {
1532    ResourceMark rm;
1533    HandleMark hm;
1534    G1CMTask* task = _cm->task(worker_id);
1535    G1CMIsAliveClosure g1_is_alive(_g1h);
1536    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1537    G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1538
1539    _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1540  }
1541};
1542
1543void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1544  assert(_workers != NULL, "Need parallel worker threads.");
1545  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1546
1547  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1548
1549  // We need to reset the concurrency level before each
1550  // proxy task execution, so that the termination protocol
1551  // and overflow handling in G1CMTask::do_marking_step() knows
1552  // how many workers to wait for.
1553  _cm->set_concurrency(_active_workers);
1554  _workers->run_task(&proc_task_proxy);
1555}
1556
1557class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1558  typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1559  EnqueueTask& _enq_task;
1560
1561public:
1562  G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1563    AbstractGangTask("Enqueue reference objects in parallel"),
1564    _enq_task(enq_task) { }
1565
1566  virtual void work(uint worker_id) {
1567    _enq_task.work(worker_id);
1568  }
1569};
1570
1571void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1572  assert(_workers != NULL, "Need parallel worker threads.");
1573  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1574
1575  G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1576
1577  // Not strictly necessary but...
1578  //
1579  // We need to reset the concurrency level before each
1580  // proxy task execution, so that the termination protocol
1581  // and overflow handling in G1CMTask::do_marking_step() knows
1582  // how many workers to wait for.
1583  _cm->set_concurrency(_active_workers);
1584  _workers->run_task(&enq_task_proxy);
1585}
1586
1587void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1588  if (has_overflown()) {
1589    // Skip processing the discovered references if we have
1590    // overflown the global marking stack. Reference objects
1591    // only get discovered once so it is OK to not
1592    // de-populate the discovered reference lists. We could have,
1593    // but the only benefit would be that, when marking restarts,
1594    // less reference objects are discovered.
1595    return;
1596  }
1597
1598  ResourceMark rm;
1599  HandleMark   hm;
1600
1601  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1602
1603  // Is alive closure.
1604  G1CMIsAliveClosure g1_is_alive(g1h);
1605
1606  // Inner scope to exclude the cleaning of the string and symbol
1607  // tables from the displayed time.
1608  {
1609    GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1610
1611    ReferenceProcessor* rp = g1h->ref_processor_cm();
1612
1613    // See the comment in G1CollectedHeap::ref_processing_init()
1614    // about how reference processing currently works in G1.
1615
1616    // Set the soft reference policy
1617    rp->setup_policy(clear_all_soft_refs);
1618    assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1619
1620    // Instances of the 'Keep Alive' and 'Complete GC' closures used
1621    // in serial reference processing. Note these closures are also
1622    // used for serially processing (by the the current thread) the
1623    // JNI references during parallel reference processing.
1624    //
1625    // These closures do not need to synchronize with the worker
1626    // threads involved in parallel reference processing as these
1627    // instances are executed serially by the current thread (e.g.
1628    // reference processing is not multi-threaded and is thus
1629    // performed by the current thread instead of a gang worker).
1630    //
1631    // The gang tasks involved in parallel reference processing create
1632    // their own instances of these closures, which do their own
1633    // synchronization among themselves.
1634    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1635    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1636
1637    // We need at least one active thread. If reference processing
1638    // is not multi-threaded we use the current (VMThread) thread,
1639    // otherwise we use the work gang from the G1CollectedHeap and
1640    // we utilize all the worker threads we can.
1641    bool processing_is_mt = rp->processing_is_mt();
1642    uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1643    active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1644
1645    // Parallel processing task executor.
1646    G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1647                                              g1h->workers(), active_workers);
1648    AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1649
1650    // Set the concurrency level. The phase was already set prior to
1651    // executing the remark task.
1652    set_concurrency(active_workers);
1653
1654    // Set the degree of MT processing here.  If the discovery was done MT,
1655    // the number of threads involved during discovery could differ from
1656    // the number of active workers.  This is OK as long as the discovered
1657    // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1658    rp->set_active_mt_degree(active_workers);
1659
1660    ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1661
1662    // Process the weak references.
1663    const ReferenceProcessorStats& stats =
1664        rp->process_discovered_references(&g1_is_alive,
1665                                          &g1_keep_alive,
1666                                          &g1_drain_mark_stack,
1667                                          executor,
1668                                          &pt);
1669    _gc_tracer_cm->report_gc_reference_stats(stats);
1670    pt.print_all_references();
1671
1672    // The do_oop work routines of the keep_alive and drain_marking_stack
1673    // oop closures will set the has_overflown flag if we overflow the
1674    // global marking stack.
1675
1676    assert(has_overflown() || _global_mark_stack.is_empty(),
1677            "Mark stack should be empty (unless it has overflown)");
1678
1679    assert(rp->num_q() == active_workers, "why not");
1680
1681    rp->enqueue_discovered_references(executor, &pt);
1682
1683    rp->verify_no_references_recorded();
1684
1685    pt.print_enqueue_phase();
1686
1687    assert(!rp->discovery_enabled(), "Post condition");
1688  }
1689
1690  if (has_overflown()) {
1691    // We can not trust g1_is_alive if the marking stack overflowed
1692    return;
1693  }
1694
1695  assert(_global_mark_stack.is_empty(), "Marking should have completed");
1696
1697  // Unload Klasses, String, Symbols, Code Cache, etc.
1698  if (ClassUnloadingWithConcurrentMark) {
1699    GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1700    bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1701    g1h->complete_cleaning(&g1_is_alive, purged_classes);
1702  } else {
1703    GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1704    // No need to clean string table and symbol table as they are treated as strong roots when
1705    // class unloading is disabled.
1706    g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1707
1708  }
1709}
1710
1711void G1ConcurrentMark::swapMarkBitMaps() {
1712  G1CMBitMap* temp = _prevMarkBitMap;
1713  _prevMarkBitMap  = _nextMarkBitMap;
1714  _nextMarkBitMap  = temp;
1715}
1716
1717// Closure for marking entries in SATB buffers.
1718class G1CMSATBBufferClosure : public SATBBufferClosure {
1719private:
1720  G1CMTask* _task;
1721  G1CollectedHeap* _g1h;
1722
1723  // This is very similar to G1CMTask::deal_with_reference, but with
1724  // more relaxed requirements for the argument, so this must be more
1725  // circumspect about treating the argument as an object.
1726  void do_entry(void* entry) const {
1727    _task->increment_refs_reached();
1728    oop const obj = static_cast<oop>(entry);
1729    _task->make_reference_grey(obj);
1730  }
1731
1732public:
1733  G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1734    : _task(task), _g1h(g1h) { }
1735
1736  virtual void do_buffer(void** buffer, size_t size) {
1737    for (size_t i = 0; i < size; ++i) {
1738      do_entry(buffer[i]);
1739    }
1740  }
1741};
1742
1743class G1RemarkThreadsClosure : public ThreadClosure {
1744  G1CMSATBBufferClosure _cm_satb_cl;
1745  G1CMOopClosure _cm_cl;
1746  MarkingCodeBlobClosure _code_cl;
1747  int _thread_parity;
1748
1749 public:
1750  G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1751    _cm_satb_cl(task, g1h),
1752    _cm_cl(g1h, g1h->concurrent_mark(), task),
1753    _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1754    _thread_parity(Threads::thread_claim_parity()) {}
1755
1756  void do_thread(Thread* thread) {
1757    if (thread->is_Java_thread()) {
1758      if (thread->claim_oops_do(true, _thread_parity)) {
1759        JavaThread* jt = (JavaThread*)thread;
1760
1761        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1762        // however the liveness of oops reachable from nmethods have very complex lifecycles:
1763        // * Alive if on the stack of an executing method
1764        // * Weakly reachable otherwise
1765        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1766        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1767        jt->nmethods_do(&_code_cl);
1768
1769        jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1770      }
1771    } else if (thread->is_VM_thread()) {
1772      if (thread->claim_oops_do(true, _thread_parity)) {
1773        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1774      }
1775    }
1776  }
1777};
1778
1779class G1CMRemarkTask: public AbstractGangTask {
1780private:
1781  G1ConcurrentMark* _cm;
1782public:
1783  void work(uint worker_id) {
1784    // Since all available tasks are actually started, we should
1785    // only proceed if we're supposed to be active.
1786    if (worker_id < _cm->active_tasks()) {
1787      G1CMTask* task = _cm->task(worker_id);
1788      task->record_start_time();
1789      {
1790        ResourceMark rm;
1791        HandleMark hm;
1792
1793        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1794        Threads::threads_do(&threads_f);
1795      }
1796
1797      do {
1798        task->do_marking_step(1000000000.0 /* something very large */,
1799                              true         /* do_termination       */,
1800                              false        /* is_serial            */);
1801      } while (task->has_aborted() && !_cm->has_overflown());
1802      // If we overflow, then we do not want to restart. We instead
1803      // want to abort remark and do concurrent marking again.
1804      task->record_end_time();
1805    }
1806  }
1807
1808  G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1809    AbstractGangTask("Par Remark"), _cm(cm) {
1810    _cm->terminator()->reset_for_reuse(active_workers);
1811  }
1812};
1813
1814void G1ConcurrentMark::checkpointRootsFinalWork() {
1815  ResourceMark rm;
1816  HandleMark   hm;
1817  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1818
1819  GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1820
1821  g1h->ensure_parsability(false);
1822
1823  // this is remark, so we'll use up all active threads
1824  uint active_workers = g1h->workers()->active_workers();
1825  set_concurrency_and_phase(active_workers, false /* concurrent */);
1826  // Leave _parallel_marking_threads at it's
1827  // value originally calculated in the G1ConcurrentMark
1828  // constructor and pass values of the active workers
1829  // through the gang in the task.
1830
1831  {
1832    StrongRootsScope srs(active_workers);
1833
1834    G1CMRemarkTask remarkTask(this, active_workers);
1835    // We will start all available threads, even if we decide that the
1836    // active_workers will be fewer. The extra ones will just bail out
1837    // immediately.
1838    g1h->workers()->run_task(&remarkTask);
1839  }
1840
1841  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1842  guarantee(has_overflown() ||
1843            satb_mq_set.completed_buffers_num() == 0,
1844            "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1845            BOOL_TO_STR(has_overflown()),
1846            satb_mq_set.completed_buffers_num());
1847
1848  print_stats();
1849}
1850
1851void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1852  _prevMarkBitMap->clear_range(mr);
1853}
1854
1855HeapRegion*
1856G1ConcurrentMark::claim_region(uint worker_id) {
1857  // "checkpoint" the finger
1858  HeapWord* finger = _finger;
1859
1860  // _heap_end will not change underneath our feet; it only changes at
1861  // yield points.
1862  while (finger < _heap_end) {
1863    assert(_g1h->is_in_g1_reserved(finger), "invariant");
1864
1865    HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1866    // Make sure that the reads below do not float before loading curr_region.
1867    OrderAccess::loadload();
1868    // Above heap_region_containing may return NULL as we always scan claim
1869    // until the end of the heap. In this case, just jump to the next region.
1870    HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1871
1872    // Is the gap between reading the finger and doing the CAS too long?
1873    HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1874    if (res == finger && curr_region != NULL) {
1875      // we succeeded
1876      HeapWord*   bottom        = curr_region->bottom();
1877      HeapWord*   limit         = curr_region->next_top_at_mark_start();
1878
1879      // notice that _finger == end cannot be guaranteed here since,
1880      // someone else might have moved the finger even further
1881      assert(_finger >= end, "the finger should have moved forward");
1882
1883      if (limit > bottom) {
1884        return curr_region;
1885      } else {
1886        assert(limit == bottom,
1887               "the region limit should be at bottom");
1888        // we return NULL and the caller should try calling
1889        // claim_region() again.
1890        return NULL;
1891      }
1892    } else {
1893      assert(_finger > finger, "the finger should have moved forward");
1894      // read it again
1895      finger = _finger;
1896    }
1897  }
1898
1899  return NULL;
1900}
1901
1902#ifndef PRODUCT
1903class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1904private:
1905  G1CollectedHeap* _g1h;
1906  const char* _phase;
1907  int _info;
1908
1909public:
1910  VerifyNoCSetOops(const char* phase, int info = -1) :
1911    _g1h(G1CollectedHeap::heap()),
1912    _phase(phase),
1913    _info(info)
1914  { }
1915
1916  void operator()(G1TaskQueueEntry task_entry) const {
1917    if (task_entry.is_array_slice()) {
1918      guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1919      return;
1920    }
1921    guarantee(oopDesc::is_oop(task_entry.obj()),
1922              "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1923              p2i(task_entry.obj()), _phase, _info);
1924    guarantee(!_g1h->is_in_cset(task_entry.obj()),
1925              "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1926              p2i(task_entry.obj()), _phase, _info);
1927  }
1928};
1929
1930void G1ConcurrentMark::verify_no_cset_oops() {
1931  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1932  if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1933    return;
1934  }
1935
1936  // Verify entries on the global mark stack
1937  _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1938
1939  // Verify entries on the task queues
1940  for (uint i = 0; i < _max_worker_id; ++i) {
1941    G1CMTaskQueue* queue = _task_queues->queue(i);
1942    queue->iterate(VerifyNoCSetOops("Queue", i));
1943  }
1944
1945  // Verify the global finger
1946  HeapWord* global_finger = finger();
1947  if (global_finger != NULL && global_finger < _heap_end) {
1948    // Since we always iterate over all regions, we might get a NULL HeapRegion
1949    // here.
1950    HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1951    guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1952              "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1953              p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1954  }
1955
1956  // Verify the task fingers
1957  assert(parallel_marking_threads() <= _max_worker_id, "sanity");
1958  for (uint i = 0; i < parallel_marking_threads(); ++i) {
1959    G1CMTask* task = _tasks[i];
1960    HeapWord* task_finger = task->finger();
1961    if (task_finger != NULL && task_finger < _heap_end) {
1962      // See above note on the global finger verification.
1963      HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1964      guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1965                !task_hr->in_collection_set(),
1966                "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1967                p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1968    }
1969  }
1970}
1971#endif // PRODUCT
1972void G1ConcurrentMark::create_live_data() {
1973  _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
1974}
1975
1976void G1ConcurrentMark::finalize_live_data() {
1977  _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
1978}
1979
1980void G1ConcurrentMark::verify_live_data() {
1981  _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
1982}
1983
1984void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
1985  _g1h->g1_rem_set()->clear_card_live_data(workers);
1986}
1987
1988#ifdef ASSERT
1989void G1ConcurrentMark::verify_live_data_clear() {
1990  _g1h->g1_rem_set()->verify_card_live_data_is_clear();
1991}
1992#endif
1993
1994void G1ConcurrentMark::print_stats() {
1995  if (!log_is_enabled(Debug, gc, stats)) {
1996    return;
1997  }
1998  log_debug(gc, stats)("---------------------------------------------------------------------");
1999  for (size_t i = 0; i < _active_tasks; ++i) {
2000    _tasks[i]->print_stats();
2001    log_debug(gc, stats)("---------------------------------------------------------------------");
2002  }
2003}
2004
2005void G1ConcurrentMark::abort() {
2006  if (!cmThread()->during_cycle() || _has_aborted) {
2007    // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2008    return;
2009  }
2010
2011  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2012  // concurrent bitmap clearing.
2013  {
2014    GCTraceTime(Debug, gc)("Clear Next Bitmap");
2015    clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2016  }
2017  // Note we cannot clear the previous marking bitmap here
2018  // since VerifyDuringGC verifies the objects marked during
2019  // a full GC against the previous bitmap.
2020
2021  {
2022    GCTraceTime(Debug, gc)("Clear Live Data");
2023    clear_live_data(_g1h->workers());
2024  }
2025  DEBUG_ONLY({
2026    GCTraceTime(Debug, gc)("Verify Live Data Clear");
2027    verify_live_data_clear();
2028  })
2029  // Empty mark stack
2030  reset_marking_state();
2031  for (uint i = 0; i < _max_worker_id; ++i) {
2032    _tasks[i]->clear_region_fields();
2033  }
2034  _first_overflow_barrier_sync.abort();
2035  _second_overflow_barrier_sync.abort();
2036  _has_aborted = true;
2037
2038  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2039  satb_mq_set.abandon_partial_marking();
2040  // This can be called either during or outside marking, we'll read
2041  // the expected_active value from the SATB queue set.
2042  satb_mq_set.set_active_all_threads(
2043                                 false, /* new active value */
2044                                 satb_mq_set.is_active() /* expected_active */);
2045}
2046
2047static void print_ms_time_info(const char* prefix, const char* name,
2048                               NumberSeq& ns) {
2049  log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2050                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2051  if (ns.num() > 0) {
2052    log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2053                           prefix, ns.sd(), ns.maximum());
2054  }
2055}
2056
2057void G1ConcurrentMark::print_summary_info() {
2058  Log(gc, marking) log;
2059  if (!log.is_trace()) {
2060    return;
2061  }
2062
2063  log.trace(" Concurrent marking:");
2064  print_ms_time_info("  ", "init marks", _init_times);
2065  print_ms_time_info("  ", "remarks", _remark_times);
2066  {
2067    print_ms_time_info("     ", "final marks", _remark_mark_times);
2068    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2069
2070  }
2071  print_ms_time_info("  ", "cleanups", _cleanup_times);
2072  log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2073            _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2074  if (G1ScrubRemSets) {
2075    log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2076              _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2077  }
2078  log.trace("  Total stop_world time = %8.2f s.",
2079            (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2080  log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2081            cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2082}
2083
2084void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2085  _parallel_workers->print_worker_threads_on(st);
2086}
2087
2088void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2089  _parallel_workers->threads_do(tc);
2090}
2091
2092void G1ConcurrentMark::print_on_error(outputStream* st) const {
2093  st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2094      p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2095  _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2096  _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2097}
2098
2099static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2100  ReferenceProcessor* result = g1h->ref_processor_cm();
2101  assert(result != NULL, "CM reference processor should not be NULL");
2102  return result;
2103}
2104
2105G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2106                               G1ConcurrentMark* cm,
2107                               G1CMTask* task)
2108  : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2109    _g1h(g1h), _cm(cm), _task(task)
2110{ }
2111
2112void G1CMTask::setup_for_region(HeapRegion* hr) {
2113  assert(hr != NULL,
2114        "claim_region() should have filtered out NULL regions");
2115  _curr_region  = hr;
2116  _finger       = hr->bottom();
2117  update_region_limit();
2118}
2119
2120void G1CMTask::update_region_limit() {
2121  HeapRegion* hr            = _curr_region;
2122  HeapWord* bottom          = hr->bottom();
2123  HeapWord* limit           = hr->next_top_at_mark_start();
2124
2125  if (limit == bottom) {
2126    // The region was collected underneath our feet.
2127    // We set the finger to bottom to ensure that the bitmap
2128    // iteration that will follow this will not do anything.
2129    // (this is not a condition that holds when we set the region up,
2130    // as the region is not supposed to be empty in the first place)
2131    _finger = bottom;
2132  } else if (limit >= _region_limit) {
2133    assert(limit >= _finger, "peace of mind");
2134  } else {
2135    assert(limit < _region_limit, "only way to get here");
2136    // This can happen under some pretty unusual circumstances.  An
2137    // evacuation pause empties the region underneath our feet (NTAMS
2138    // at bottom). We then do some allocation in the region (NTAMS
2139    // stays at bottom), followed by the region being used as a GC
2140    // alloc region (NTAMS will move to top() and the objects
2141    // originally below it will be grayed). All objects now marked in
2142    // the region are explicitly grayed, if below the global finger,
2143    // and we do not need in fact to scan anything else. So, we simply
2144    // set _finger to be limit to ensure that the bitmap iteration
2145    // doesn't do anything.
2146    _finger = limit;
2147  }
2148
2149  _region_limit = limit;
2150}
2151
2152void G1CMTask::giveup_current_region() {
2153  assert(_curr_region != NULL, "invariant");
2154  clear_region_fields();
2155}
2156
2157void G1CMTask::clear_region_fields() {
2158  // Values for these three fields that indicate that we're not
2159  // holding on to a region.
2160  _curr_region   = NULL;
2161  _finger        = NULL;
2162  _region_limit  = NULL;
2163}
2164
2165void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2166  if (cm_oop_closure == NULL) {
2167    assert(_cm_oop_closure != NULL, "invariant");
2168  } else {
2169    assert(_cm_oop_closure == NULL, "invariant");
2170  }
2171  _cm_oop_closure = cm_oop_closure;
2172}
2173
2174void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2175  guarantee(nextMarkBitMap != NULL, "invariant");
2176  _nextMarkBitMap                = nextMarkBitMap;
2177  clear_region_fields();
2178
2179  _calls                         = 0;
2180  _elapsed_time_ms               = 0.0;
2181  _termination_time_ms           = 0.0;
2182  _termination_start_time_ms     = 0.0;
2183}
2184
2185bool G1CMTask::should_exit_termination() {
2186  regular_clock_call();
2187  // This is called when we are in the termination protocol. We should
2188  // quit if, for some reason, this task wants to abort or the global
2189  // stack is not empty (this means that we can get work from it).
2190  return !_cm->mark_stack_empty() || has_aborted();
2191}
2192
2193void G1CMTask::reached_limit() {
2194  assert(_words_scanned >= _words_scanned_limit ||
2195         _refs_reached >= _refs_reached_limit ,
2196         "shouldn't have been called otherwise");
2197  regular_clock_call();
2198}
2199
2200void G1CMTask::regular_clock_call() {
2201  if (has_aborted()) return;
2202
2203  // First, we need to recalculate the words scanned and refs reached
2204  // limits for the next clock call.
2205  recalculate_limits();
2206
2207  // During the regular clock call we do the following
2208
2209  // (1) If an overflow has been flagged, then we abort.
2210  if (_cm->has_overflown()) {
2211    set_has_aborted();
2212    return;
2213  }
2214
2215  // If we are not concurrent (i.e. we're doing remark) we don't need
2216  // to check anything else. The other steps are only needed during
2217  // the concurrent marking phase.
2218  if (!concurrent()) return;
2219
2220  // (2) If marking has been aborted for Full GC, then we also abort.
2221  if (_cm->has_aborted()) {
2222    set_has_aborted();
2223    return;
2224  }
2225
2226  double curr_time_ms = os::elapsedVTime() * 1000.0;
2227
2228  // (4) We check whether we should yield. If we have to, then we abort.
2229  if (SuspendibleThreadSet::should_yield()) {
2230    // We should yield. To do this we abort the task. The caller is
2231    // responsible for yielding.
2232    set_has_aborted();
2233    return;
2234  }
2235
2236  // (5) We check whether we've reached our time quota. If we have,
2237  // then we abort.
2238  double elapsed_time_ms = curr_time_ms - _start_time_ms;
2239  if (elapsed_time_ms > _time_target_ms) {
2240    set_has_aborted();
2241    _has_timed_out = true;
2242    return;
2243  }
2244
2245  // (6) Finally, we check whether there are enough completed STAB
2246  // buffers available for processing. If there are, we abort.
2247  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2248  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2249    // we do need to process SATB buffers, we'll abort and restart
2250    // the marking task to do so
2251    set_has_aborted();
2252    return;
2253  }
2254}
2255
2256void G1CMTask::recalculate_limits() {
2257  _real_words_scanned_limit = _words_scanned + words_scanned_period;
2258  _words_scanned_limit      = _real_words_scanned_limit;
2259
2260  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2261  _refs_reached_limit       = _real_refs_reached_limit;
2262}
2263
2264void G1CMTask::decrease_limits() {
2265  // This is called when we believe that we're going to do an infrequent
2266  // operation which will increase the per byte scanned cost (i.e. move
2267  // entries to/from the global stack). It basically tries to decrease the
2268  // scanning limit so that the clock is called earlier.
2269
2270  _words_scanned_limit = _real_words_scanned_limit -
2271    3 * words_scanned_period / 4;
2272  _refs_reached_limit  = _real_refs_reached_limit -
2273    3 * refs_reached_period / 4;
2274}
2275
2276void G1CMTask::move_entries_to_global_stack() {
2277  // Local array where we'll store the entries that will be popped
2278  // from the local queue.
2279  G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2280
2281  size_t n = 0;
2282  G1TaskQueueEntry task_entry;
2283  while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2284    buffer[n] = task_entry;
2285    ++n;
2286  }
2287  if (n < G1CMMarkStack::EntriesPerChunk) {
2288    buffer[n] = G1TaskQueueEntry();
2289  }
2290
2291  if (n > 0) {
2292    if (!_cm->mark_stack_push(buffer)) {
2293      set_has_aborted();
2294    }
2295  }
2296
2297  // This operation was quite expensive, so decrease the limits.
2298  decrease_limits();
2299}
2300
2301bool G1CMTask::get_entries_from_global_stack() {
2302  // Local array where we'll store the entries that will be popped
2303  // from the global stack.
2304  G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2305
2306  if (!_cm->mark_stack_pop(buffer)) {
2307    return false;
2308  }
2309
2310  // We did actually pop at least one entry.
2311  for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2312    G1TaskQueueEntry task_entry = buffer[i];
2313    if (task_entry.is_null()) {
2314      break;
2315    }
2316    assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2317    bool success = _task_queue->push(task_entry);
2318    // We only call this when the local queue is empty or under a
2319    // given target limit. So, we do not expect this push to fail.
2320    assert(success, "invariant");
2321  }
2322
2323  // This operation was quite expensive, so decrease the limits
2324  decrease_limits();
2325  return true;
2326}
2327
2328void G1CMTask::drain_local_queue(bool partially) {
2329  if (has_aborted()) {
2330    return;
2331  }
2332
2333  // Decide what the target size is, depending whether we're going to
2334  // drain it partially (so that other tasks can steal if they run out
2335  // of things to do) or totally (at the very end).
2336  size_t target_size;
2337  if (partially) {
2338    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2339  } else {
2340    target_size = 0;
2341  }
2342
2343  if (_task_queue->size() > target_size) {
2344    G1TaskQueueEntry entry;
2345    bool ret = _task_queue->pop_local(entry);
2346    while (ret) {
2347      scan_task_entry(entry);
2348      if (_task_queue->size() <= target_size || has_aborted()) {
2349        ret = false;
2350      } else {
2351        ret = _task_queue->pop_local(entry);
2352      }
2353    }
2354  }
2355}
2356
2357void G1CMTask::drain_global_stack(bool partially) {
2358  if (has_aborted()) return;
2359
2360  // We have a policy to drain the local queue before we attempt to
2361  // drain the global stack.
2362  assert(partially || _task_queue->size() == 0, "invariant");
2363
2364  // Decide what the target size is, depending whether we're going to
2365  // drain it partially (so that other tasks can steal if they run out
2366  // of things to do) or totally (at the very end).
2367  // Notice that when draining the global mark stack partially, due to the racyness
2368  // of the mark stack size update we might in fact drop below the target. But,
2369  // this is not a problem.
2370  // In case of total draining, we simply process until the global mark stack is
2371  // totally empty, disregarding the size counter.
2372  if (partially) {
2373    size_t const target_size = _cm->partial_mark_stack_size_target();
2374    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2375      if (get_entries_from_global_stack()) {
2376        drain_local_queue(partially);
2377      }
2378    }
2379  } else {
2380    while (!has_aborted() && get_entries_from_global_stack()) {
2381      drain_local_queue(partially);
2382    }
2383  }
2384}
2385
2386// SATB Queue has several assumptions on whether to call the par or
2387// non-par versions of the methods. this is why some of the code is
2388// replicated. We should really get rid of the single-threaded version
2389// of the code to simplify things.
2390void G1CMTask::drain_satb_buffers() {
2391  if (has_aborted()) return;
2392
2393  // We set this so that the regular clock knows that we're in the
2394  // middle of draining buffers and doesn't set the abort flag when it
2395  // notices that SATB buffers are available for draining. It'd be
2396  // very counter productive if it did that. :-)
2397  _draining_satb_buffers = true;
2398
2399  G1CMSATBBufferClosure satb_cl(this, _g1h);
2400  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2401
2402  // This keeps claiming and applying the closure to completed buffers
2403  // until we run out of buffers or we need to abort.
2404  while (!has_aborted() &&
2405         satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2406    regular_clock_call();
2407  }
2408
2409  _draining_satb_buffers = false;
2410
2411  assert(has_aborted() ||
2412         concurrent() ||
2413         satb_mq_set.completed_buffers_num() == 0, "invariant");
2414
2415  // again, this was a potentially expensive operation, decrease the
2416  // limits to get the regular clock call early
2417  decrease_limits();
2418}
2419
2420void G1CMTask::print_stats() {
2421  log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2422                       _worker_id, _calls);
2423  log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2424                       _elapsed_time_ms, _termination_time_ms);
2425  log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2426                       _step_times_ms.num(), _step_times_ms.avg(),
2427                       _step_times_ms.sd());
2428  log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2429                       _step_times_ms.maximum(), _step_times_ms.sum());
2430}
2431
2432bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2433  return _task_queues->steal(worker_id, hash_seed, task_entry);
2434}
2435
2436/*****************************************************************************
2437
2438    The do_marking_step(time_target_ms, ...) method is the building
2439    block of the parallel marking framework. It can be called in parallel
2440    with other invocations of do_marking_step() on different tasks
2441    (but only one per task, obviously) and concurrently with the
2442    mutator threads, or during remark, hence it eliminates the need
2443    for two versions of the code. When called during remark, it will
2444    pick up from where the task left off during the concurrent marking
2445    phase. Interestingly, tasks are also claimable during evacuation
2446    pauses too, since do_marking_step() ensures that it aborts before
2447    it needs to yield.
2448
2449    The data structures that it uses to do marking work are the
2450    following:
2451
2452      (1) Marking Bitmap. If there are gray objects that appear only
2453      on the bitmap (this happens either when dealing with an overflow
2454      or when the initial marking phase has simply marked the roots
2455      and didn't push them on the stack), then tasks claim heap
2456      regions whose bitmap they then scan to find gray objects. A
2457      global finger indicates where the end of the last claimed region
2458      is. A local finger indicates how far into the region a task has
2459      scanned. The two fingers are used to determine how to gray an
2460      object (i.e. whether simply marking it is OK, as it will be
2461      visited by a task in the future, or whether it needs to be also
2462      pushed on a stack).
2463
2464      (2) Local Queue. The local queue of the task which is accessed
2465      reasonably efficiently by the task. Other tasks can steal from
2466      it when they run out of work. Throughout the marking phase, a
2467      task attempts to keep its local queue short but not totally
2468      empty, so that entries are available for stealing by other
2469      tasks. Only when there is no more work, a task will totally
2470      drain its local queue.
2471
2472      (3) Global Mark Stack. This handles local queue overflow. During
2473      marking only sets of entries are moved between it and the local
2474      queues, as access to it requires a mutex and more fine-grain
2475      interaction with it which might cause contention. If it
2476      overflows, then the marking phase should restart and iterate
2477      over the bitmap to identify gray objects. Throughout the marking
2478      phase, tasks attempt to keep the global mark stack at a small
2479      length but not totally empty, so that entries are available for
2480      popping by other tasks. Only when there is no more work, tasks
2481      will totally drain the global mark stack.
2482
2483      (4) SATB Buffer Queue. This is where completed SATB buffers are
2484      made available. Buffers are regularly removed from this queue
2485      and scanned for roots, so that the queue doesn't get too
2486      long. During remark, all completed buffers are processed, as
2487      well as the filled in parts of any uncompleted buffers.
2488
2489    The do_marking_step() method tries to abort when the time target
2490    has been reached. There are a few other cases when the
2491    do_marking_step() method also aborts:
2492
2493      (1) When the marking phase has been aborted (after a Full GC).
2494
2495      (2) When a global overflow (on the global stack) has been
2496      triggered. Before the task aborts, it will actually sync up with
2497      the other tasks to ensure that all the marking data structures
2498      (local queues, stacks, fingers etc.)  are re-initialized so that
2499      when do_marking_step() completes, the marking phase can
2500      immediately restart.
2501
2502      (3) When enough completed SATB buffers are available. The
2503      do_marking_step() method only tries to drain SATB buffers right
2504      at the beginning. So, if enough buffers are available, the
2505      marking step aborts and the SATB buffers are processed at
2506      the beginning of the next invocation.
2507
2508      (4) To yield. when we have to yield then we abort and yield
2509      right at the end of do_marking_step(). This saves us from a lot
2510      of hassle as, by yielding we might allow a Full GC. If this
2511      happens then objects will be compacted underneath our feet, the
2512      heap might shrink, etc. We save checking for this by just
2513      aborting and doing the yield right at the end.
2514
2515    From the above it follows that the do_marking_step() method should
2516    be called in a loop (or, otherwise, regularly) until it completes.
2517
2518    If a marking step completes without its has_aborted() flag being
2519    true, it means it has completed the current marking phase (and
2520    also all other marking tasks have done so and have all synced up).
2521
2522    A method called regular_clock_call() is invoked "regularly" (in
2523    sub ms intervals) throughout marking. It is this clock method that
2524    checks all the abort conditions which were mentioned above and
2525    decides when the task should abort. A work-based scheme is used to
2526    trigger this clock method: when the number of object words the
2527    marking phase has scanned or the number of references the marking
2528    phase has visited reach a given limit. Additional invocations to
2529    the method clock have been planted in a few other strategic places
2530    too. The initial reason for the clock method was to avoid calling
2531    vtime too regularly, as it is quite expensive. So, once it was in
2532    place, it was natural to piggy-back all the other conditions on it
2533    too and not constantly check them throughout the code.
2534
2535    If do_termination is true then do_marking_step will enter its
2536    termination protocol.
2537
2538    The value of is_serial must be true when do_marking_step is being
2539    called serially (i.e. by the VMThread) and do_marking_step should
2540    skip any synchronization in the termination and overflow code.
2541    Examples include the serial remark code and the serial reference
2542    processing closures.
2543
2544    The value of is_serial must be false when do_marking_step is
2545    being called by any of the worker threads in a work gang.
2546    Examples include the concurrent marking code (CMMarkingTask),
2547    the MT remark code, and the MT reference processing closures.
2548
2549 *****************************************************************************/
2550
2551void G1CMTask::do_marking_step(double time_target_ms,
2552                               bool do_termination,
2553                               bool is_serial) {
2554  assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2555  assert(concurrent() == _cm->concurrent(), "they should be the same");
2556
2557  G1Policy* g1_policy = _g1h->g1_policy();
2558  assert(_task_queues != NULL, "invariant");
2559  assert(_task_queue != NULL, "invariant");
2560  assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2561
2562  assert(!_claimed,
2563         "only one thread should claim this task at any one time");
2564
2565  // OK, this doesn't safeguard again all possible scenarios, as it is
2566  // possible for two threads to set the _claimed flag at the same
2567  // time. But it is only for debugging purposes anyway and it will
2568  // catch most problems.
2569  _claimed = true;
2570
2571  _start_time_ms = os::elapsedVTime() * 1000.0;
2572
2573  // If do_stealing is true then do_marking_step will attempt to
2574  // steal work from the other G1CMTasks. It only makes sense to
2575  // enable stealing when the termination protocol is enabled
2576  // and do_marking_step() is not being called serially.
2577  bool do_stealing = do_termination && !is_serial;
2578
2579  double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2580  _time_target_ms = time_target_ms - diff_prediction_ms;
2581
2582  // set up the variables that are used in the work-based scheme to
2583  // call the regular clock method
2584  _words_scanned = 0;
2585  _refs_reached  = 0;
2586  recalculate_limits();
2587
2588  // clear all flags
2589  clear_has_aborted();
2590  _has_timed_out = false;
2591  _draining_satb_buffers = false;
2592
2593  ++_calls;
2594
2595  // Set up the bitmap and oop closures. Anything that uses them is
2596  // eventually called from this method, so it is OK to allocate these
2597  // statically.
2598  G1CMBitMapClosure bitmap_closure(this, _cm);
2599  G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2600  set_cm_oop_closure(&cm_oop_closure);
2601
2602  if (_cm->has_overflown()) {
2603    // This can happen if the mark stack overflows during a GC pause
2604    // and this task, after a yield point, restarts. We have to abort
2605    // as we need to get into the overflow protocol which happens
2606    // right at the end of this task.
2607    set_has_aborted();
2608  }
2609
2610  // First drain any available SATB buffers. After this, we will not
2611  // look at SATB buffers before the next invocation of this method.
2612  // If enough completed SATB buffers are queued up, the regular clock
2613  // will abort this task so that it restarts.
2614  drain_satb_buffers();
2615  // ...then partially drain the local queue and the global stack
2616  drain_local_queue(true);
2617  drain_global_stack(true);
2618
2619  do {
2620    if (!has_aborted() && _curr_region != NULL) {
2621      // This means that we're already holding on to a region.
2622      assert(_finger != NULL, "if region is not NULL, then the finger "
2623             "should not be NULL either");
2624
2625      // We might have restarted this task after an evacuation pause
2626      // which might have evacuated the region we're holding on to
2627      // underneath our feet. Let's read its limit again to make sure
2628      // that we do not iterate over a region of the heap that
2629      // contains garbage (update_region_limit() will also move
2630      // _finger to the start of the region if it is found empty).
2631      update_region_limit();
2632      // We will start from _finger not from the start of the region,
2633      // as we might be restarting this task after aborting half-way
2634      // through scanning this region. In this case, _finger points to
2635      // the address where we last found a marked object. If this is a
2636      // fresh region, _finger points to start().
2637      MemRegion mr = MemRegion(_finger, _region_limit);
2638
2639      assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2640             "humongous regions should go around loop once only");
2641
2642      // Some special cases:
2643      // If the memory region is empty, we can just give up the region.
2644      // If the current region is humongous then we only need to check
2645      // the bitmap for the bit associated with the start of the object,
2646      // scan the object if it's live, and give up the region.
2647      // Otherwise, let's iterate over the bitmap of the part of the region
2648      // that is left.
2649      // If the iteration is successful, give up the region.
2650      if (mr.is_empty()) {
2651        giveup_current_region();
2652        regular_clock_call();
2653      } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2654        if (_nextMarkBitMap->is_marked(mr.start())) {
2655          // The object is marked - apply the closure
2656          bitmap_closure.do_addr(mr.start());
2657        }
2658        // Even if this task aborted while scanning the humongous object
2659        // we can (and should) give up the current region.
2660        giveup_current_region();
2661        regular_clock_call();
2662      } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2663        giveup_current_region();
2664        regular_clock_call();
2665      } else {
2666        assert(has_aborted(), "currently the only way to do so");
2667        // The only way to abort the bitmap iteration is to return
2668        // false from the do_bit() method. However, inside the
2669        // do_bit() method we move the _finger to point to the
2670        // object currently being looked at. So, if we bail out, we
2671        // have definitely set _finger to something non-null.
2672        assert(_finger != NULL, "invariant");
2673
2674        // Region iteration was actually aborted. So now _finger
2675        // points to the address of the object we last scanned. If we
2676        // leave it there, when we restart this task, we will rescan
2677        // the object. It is easy to avoid this. We move the finger by
2678        // enough to point to the next possible object header.
2679        assert(_finger < _region_limit, "invariant");
2680        HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2681        // Check if bitmap iteration was aborted while scanning the last object
2682        if (new_finger >= _region_limit) {
2683          giveup_current_region();
2684        } else {
2685          move_finger_to(new_finger);
2686        }
2687      }
2688    }
2689    // At this point we have either completed iterating over the
2690    // region we were holding on to, or we have aborted.
2691
2692    // We then partially drain the local queue and the global stack.
2693    // (Do we really need this?)
2694    drain_local_queue(true);
2695    drain_global_stack(true);
2696
2697    // Read the note on the claim_region() method on why it might
2698    // return NULL with potentially more regions available for
2699    // claiming and why we have to check out_of_regions() to determine
2700    // whether we're done or not.
2701    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2702      // We are going to try to claim a new region. We should have
2703      // given up on the previous one.
2704      // Separated the asserts so that we know which one fires.
2705      assert(_curr_region  == NULL, "invariant");
2706      assert(_finger       == NULL, "invariant");
2707      assert(_region_limit == NULL, "invariant");
2708      HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2709      if (claimed_region != NULL) {
2710        // Yes, we managed to claim one
2711        setup_for_region(claimed_region);
2712        assert(_curr_region == claimed_region, "invariant");
2713      }
2714      // It is important to call the regular clock here. It might take
2715      // a while to claim a region if, for example, we hit a large
2716      // block of empty regions. So we need to call the regular clock
2717      // method once round the loop to make sure it's called
2718      // frequently enough.
2719      regular_clock_call();
2720    }
2721
2722    if (!has_aborted() && _curr_region == NULL) {
2723      assert(_cm->out_of_regions(),
2724             "at this point we should be out of regions");
2725    }
2726  } while ( _curr_region != NULL && !has_aborted());
2727
2728  if (!has_aborted()) {
2729    // We cannot check whether the global stack is empty, since other
2730    // tasks might be pushing objects to it concurrently.
2731    assert(_cm->out_of_regions(),
2732           "at this point we should be out of regions");
2733    // Try to reduce the number of available SATB buffers so that
2734    // remark has less work to do.
2735    drain_satb_buffers();
2736  }
2737
2738  // Since we've done everything else, we can now totally drain the
2739  // local queue and global stack.
2740  drain_local_queue(false);
2741  drain_global_stack(false);
2742
2743  // Attempt at work stealing from other task's queues.
2744  if (do_stealing && !has_aborted()) {
2745    // We have not aborted. This means that we have finished all that
2746    // we could. Let's try to do some stealing...
2747
2748    // We cannot check whether the global stack is empty, since other
2749    // tasks might be pushing objects to it concurrently.
2750    assert(_cm->out_of_regions() && _task_queue->size() == 0,
2751           "only way to reach here");
2752    while (!has_aborted()) {
2753      G1TaskQueueEntry entry;
2754      if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2755        scan_task_entry(entry);
2756
2757        // And since we're towards the end, let's totally drain the
2758        // local queue and global stack.
2759        drain_local_queue(false);
2760        drain_global_stack(false);
2761      } else {
2762        break;
2763      }
2764    }
2765  }
2766
2767  // We still haven't aborted. Now, let's try to get into the
2768  // termination protocol.
2769  if (do_termination && !has_aborted()) {
2770    // We cannot check whether the global stack is empty, since other
2771    // tasks might be concurrently pushing objects on it.
2772    // Separated the asserts so that we know which one fires.
2773    assert(_cm->out_of_regions(), "only way to reach here");
2774    assert(_task_queue->size() == 0, "only way to reach here");
2775    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2776
2777    // The G1CMTask class also extends the TerminatorTerminator class,
2778    // hence its should_exit_termination() method will also decide
2779    // whether to exit the termination protocol or not.
2780    bool finished = (is_serial ||
2781                     _cm->terminator()->offer_termination(this));
2782    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2783    _termination_time_ms +=
2784      termination_end_time_ms - _termination_start_time_ms;
2785
2786    if (finished) {
2787      // We're all done.
2788
2789      if (_worker_id == 0) {
2790        // let's allow task 0 to do this
2791        if (concurrent()) {
2792          assert(_cm->concurrent_marking_in_progress(), "invariant");
2793          // we need to set this to false before the next
2794          // safepoint. This way we ensure that the marking phase
2795          // doesn't observe any more heap expansions.
2796          _cm->clear_concurrent_marking_in_progress();
2797        }
2798      }
2799
2800      // We can now guarantee that the global stack is empty, since
2801      // all other tasks have finished. We separated the guarantees so
2802      // that, if a condition is false, we can immediately find out
2803      // which one.
2804      guarantee(_cm->out_of_regions(), "only way to reach here");
2805      guarantee(_cm->mark_stack_empty(), "only way to reach here");
2806      guarantee(_task_queue->size() == 0, "only way to reach here");
2807      guarantee(!_cm->has_overflown(), "only way to reach here");
2808    } else {
2809      // Apparently there's more work to do. Let's abort this task. It
2810      // will restart it and we can hopefully find more things to do.
2811      set_has_aborted();
2812    }
2813  }
2814
2815  // Mainly for debugging purposes to make sure that a pointer to the
2816  // closure which was statically allocated in this frame doesn't
2817  // escape it by accident.
2818  set_cm_oop_closure(NULL);
2819  double end_time_ms = os::elapsedVTime() * 1000.0;
2820  double elapsed_time_ms = end_time_ms - _start_time_ms;
2821  // Update the step history.
2822  _step_times_ms.add(elapsed_time_ms);
2823
2824  if (has_aborted()) {
2825    // The task was aborted for some reason.
2826    if (_has_timed_out) {
2827      double diff_ms = elapsed_time_ms - _time_target_ms;
2828      // Keep statistics of how well we did with respect to hitting
2829      // our target only if we actually timed out (if we aborted for
2830      // other reasons, then the results might get skewed).
2831      _marking_step_diffs_ms.add(diff_ms);
2832    }
2833
2834    if (_cm->has_overflown()) {
2835      // This is the interesting one. We aborted because a global
2836      // overflow was raised. This means we have to restart the
2837      // marking phase and start iterating over regions. However, in
2838      // order to do this we have to make sure that all tasks stop
2839      // what they are doing and re-initialize in a safe manner. We
2840      // will achieve this with the use of two barrier sync points.
2841
2842      if (!is_serial) {
2843        // We only need to enter the sync barrier if being called
2844        // from a parallel context
2845        _cm->enter_first_sync_barrier(_worker_id);
2846
2847        // When we exit this sync barrier we know that all tasks have
2848        // stopped doing marking work. So, it's now safe to
2849        // re-initialize our data structures. At the end of this method,
2850        // task 0 will clear the global data structures.
2851      }
2852
2853      // We clear the local state of this task...
2854      clear_region_fields();
2855
2856      if (!is_serial) {
2857        // ...and enter the second barrier.
2858        _cm->enter_second_sync_barrier(_worker_id);
2859      }
2860      // At this point, if we're during the concurrent phase of
2861      // marking, everything has been re-initialized and we're
2862      // ready to restart.
2863    }
2864  }
2865
2866  _claimed = false;
2867}
2868
2869G1CMTask::G1CMTask(uint worker_id,
2870                   G1ConcurrentMark* cm,
2871                   G1CMTaskQueue* task_queue,
2872                   G1CMTaskQueueSet* task_queues)
2873  : _g1h(G1CollectedHeap::heap()),
2874    _worker_id(worker_id), _cm(cm),
2875    _objArray_processor(this),
2876    _claimed(false),
2877    _nextMarkBitMap(NULL), _hash_seed(17),
2878    _task_queue(task_queue),
2879    _task_queues(task_queues),
2880    _cm_oop_closure(NULL) {
2881  guarantee(task_queue != NULL, "invariant");
2882  guarantee(task_queues != NULL, "invariant");
2883
2884  _marking_step_diffs_ms.add(0.5);
2885}
2886
2887// These are formatting macros that are used below to ensure
2888// consistent formatting. The *_H_* versions are used to format the
2889// header for a particular value and they should be kept consistent
2890// with the corresponding macro. Also note that most of the macros add
2891// the necessary white space (as a prefix) which makes them a bit
2892// easier to compose.
2893
2894// All the output lines are prefixed with this string to be able to
2895// identify them easily in a large log file.
2896#define G1PPRL_LINE_PREFIX            "###"
2897
2898#define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2899#ifdef _LP64
2900#define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2901#else // _LP64
2902#define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2903#endif // _LP64
2904
2905// For per-region info
2906#define G1PPRL_TYPE_FORMAT            "   %-4s"
2907#define G1PPRL_TYPE_H_FORMAT          "   %4s"
2908#define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2909#define G1PPRL_BYTE_H_FORMAT          "  %9s"
2910#define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2911#define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2912
2913// For summary info
2914#define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2915#define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2916#define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2917#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2918
2919G1PrintRegionLivenessInfoClosure::
2920G1PrintRegionLivenessInfoClosure(const char* phase_name)
2921  : _total_used_bytes(0), _total_capacity_bytes(0),
2922    _total_prev_live_bytes(0), _total_next_live_bytes(0),
2923    _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
2924  G1CollectedHeap* g1h = G1CollectedHeap::heap();
2925  MemRegion g1_reserved = g1h->g1_reserved();
2926  double now = os::elapsedTime();
2927
2928  // Print the header of the output.
2929  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2930  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2931                          G1PPRL_SUM_ADDR_FORMAT("reserved")
2932                          G1PPRL_SUM_BYTE_FORMAT("region-size"),
2933                          p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2934                          HeapRegion::GrainBytes);
2935  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2936  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2937                          G1PPRL_TYPE_H_FORMAT
2938                          G1PPRL_ADDR_BASE_H_FORMAT
2939                          G1PPRL_BYTE_H_FORMAT
2940                          G1PPRL_BYTE_H_FORMAT
2941                          G1PPRL_BYTE_H_FORMAT
2942                          G1PPRL_DOUBLE_H_FORMAT
2943                          G1PPRL_BYTE_H_FORMAT
2944                          G1PPRL_BYTE_H_FORMAT,
2945                          "type", "address-range",
2946                          "used", "prev-live", "next-live", "gc-eff",
2947                          "remset", "code-roots");
2948  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2949                          G1PPRL_TYPE_H_FORMAT
2950                          G1PPRL_ADDR_BASE_H_FORMAT
2951                          G1PPRL_BYTE_H_FORMAT
2952                          G1PPRL_BYTE_H_FORMAT
2953                          G1PPRL_BYTE_H_FORMAT
2954                          G1PPRL_DOUBLE_H_FORMAT
2955                          G1PPRL_BYTE_H_FORMAT
2956                          G1PPRL_BYTE_H_FORMAT,
2957                          "", "",
2958                          "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2959                          "(bytes)", "(bytes)");
2960}
2961
2962bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
2963  const char* type       = r->get_type_str();
2964  HeapWord* bottom       = r->bottom();
2965  HeapWord* end          = r->end();
2966  size_t capacity_bytes  = r->capacity();
2967  size_t used_bytes      = r->used();
2968  size_t prev_live_bytes = r->live_bytes();
2969  size_t next_live_bytes = r->next_live_bytes();
2970  double gc_eff          = r->gc_efficiency();
2971  size_t remset_bytes    = r->rem_set()->mem_size();
2972  size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2973
2974  _total_used_bytes      += used_bytes;
2975  _total_capacity_bytes  += capacity_bytes;
2976  _total_prev_live_bytes += prev_live_bytes;
2977  _total_next_live_bytes += next_live_bytes;
2978  _total_remset_bytes    += remset_bytes;
2979  _total_strong_code_roots_bytes += strong_code_roots_bytes;
2980
2981  // Print a line for this particular region.
2982  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2983                          G1PPRL_TYPE_FORMAT
2984                          G1PPRL_ADDR_BASE_FORMAT
2985                          G1PPRL_BYTE_FORMAT
2986                          G1PPRL_BYTE_FORMAT
2987                          G1PPRL_BYTE_FORMAT
2988                          G1PPRL_DOUBLE_FORMAT
2989                          G1PPRL_BYTE_FORMAT
2990                          G1PPRL_BYTE_FORMAT,
2991                          type, p2i(bottom), p2i(end),
2992                          used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2993                          remset_bytes, strong_code_roots_bytes);
2994
2995  return false;
2996}
2997
2998G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2999  // add static memory usages to remembered set sizes
3000  _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3001  // Print the footer of the output.
3002  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3003  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3004                         " SUMMARY"
3005                         G1PPRL_SUM_MB_FORMAT("capacity")
3006                         G1PPRL_SUM_MB_PERC_FORMAT("used")
3007                         G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3008                         G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3009                         G1PPRL_SUM_MB_FORMAT("remset")
3010                         G1PPRL_SUM_MB_FORMAT("code-roots"),
3011                         bytes_to_mb(_total_capacity_bytes),
3012                         bytes_to_mb(_total_used_bytes),
3013                         perc(_total_used_bytes, _total_capacity_bytes),
3014                         bytes_to_mb(_total_prev_live_bytes),
3015                         perc(_total_prev_live_bytes, _total_capacity_bytes),
3016                         bytes_to_mb(_total_next_live_bytes),
3017                         perc(_total_next_live_bytes, _total_capacity_bytes),
3018                         bytes_to_mb(_total_remset_bytes),
3019                         bytes_to_mb(_total_strong_code_roots_bytes));
3020}
3021