g1ConcurrentMark.cpp revision 13243:7235bc30c0d7
1/*
2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "classfile/metadataOnStackMark.hpp"
27#include "classfile/symbolTable.hpp"
28#include "code/codeCache.hpp"
29#include "gc/g1/concurrentMarkThread.inline.hpp"
30#include "gc/g1/g1CollectedHeap.inline.hpp"
31#include "gc/g1/g1CollectorState.hpp"
32#include "gc/g1/g1ConcurrentMark.inline.hpp"
33#include "gc/g1/g1HeapVerifier.hpp"
34#include "gc/g1/g1OopClosures.inline.hpp"
35#include "gc/g1/g1CardLiveData.inline.hpp"
36#include "gc/g1/g1Policy.hpp"
37#include "gc/g1/g1StringDedup.hpp"
38#include "gc/g1/heapRegion.inline.hpp"
39#include "gc/g1/heapRegionRemSet.hpp"
40#include "gc/g1/heapRegionSet.inline.hpp"
41#include "gc/g1/suspendibleThreadSet.hpp"
42#include "gc/shared/gcId.hpp"
43#include "gc/shared/gcTimer.hpp"
44#include "gc/shared/gcTrace.hpp"
45#include "gc/shared/gcTraceTime.inline.hpp"
46#include "gc/shared/genOopClosures.inline.hpp"
47#include "gc/shared/referencePolicy.hpp"
48#include "gc/shared/strongRootsScope.hpp"
49#include "gc/shared/taskqueue.inline.hpp"
50#include "gc/shared/vmGCOperations.hpp"
51#include "logging/log.hpp"
52#include "memory/allocation.hpp"
53#include "memory/resourceArea.hpp"
54#include "oops/oop.inline.hpp"
55#include "runtime/atomic.hpp"
56#include "runtime/handles.inline.hpp"
57#include "runtime/java.hpp"
58#include "runtime/prefetch.inline.hpp"
59#include "services/memTracker.hpp"
60#include "utilities/growableArray.hpp"
61
62// Concurrent marking bit map wrapper
63
64G1CMBitMapRO::G1CMBitMapRO(int shifter) :
65  _bm(),
66  _shifter(shifter) {
67  _bmStartWord = 0;
68  _bmWordSize = 0;
69}
70
71HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
72                                                 const HeapWord* limit) const {
73  // First we must round addr *up* to a possible object boundary.
74  addr = align_up(addr, HeapWordSize << _shifter);
75  size_t addrOffset = heapWordToOffset(addr);
76  assert(limit != NULL, "limit must not be NULL");
77  size_t limitOffset = heapWordToOffset(limit);
78  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
79  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
80  assert(nextAddr >= addr, "get_next_one postcondition");
81  assert(nextAddr == limit || isMarked(nextAddr),
82         "get_next_one postcondition");
83  return nextAddr;
84}
85
86#ifndef PRODUCT
87bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
88  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
89  assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
90         "size inconsistency");
91  return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
92         _bmWordSize  == heap_rs.word_size();
93}
94#endif
95
96void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
97  _bm.print_on_error(st, prefix);
98}
99
100size_t G1CMBitMap::compute_size(size_t heap_size) {
101  return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
102}
103
104size_t G1CMBitMap::mark_distance() {
105  return MinObjAlignmentInBytes * BitsPerByte;
106}
107
108void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
109  _bmStartWord = heap.start();
110  _bmWordSize = heap.word_size();
111
112  _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter);
113
114  storage->set_mapping_changed_listener(&_listener);
115}
116
117void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
118  if (zero_filled) {
119    return;
120  }
121  // We need to clear the bitmap on commit, removing any existing information.
122  MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
123  _bm->clear_range(mr);
124}
125
126void G1CMBitMap::clear_range(MemRegion mr) {
127  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
128  assert(!mr.is_empty(), "unexpected empty region");
129  // convert address range into offset range
130  _bm.at_put_range(heapWordToOffset(mr.start()),
131                   heapWordToOffset(mr.end()), false);
132}
133
134G1CMMarkStack::G1CMMarkStack() :
135  _max_chunk_capacity(0),
136  _base(NULL),
137  _chunk_capacity(0) {
138  set_empty();
139}
140
141bool G1CMMarkStack::resize(size_t new_capacity) {
142  assert(is_empty(), "Only resize when stack is empty.");
143  assert(new_capacity <= _max_chunk_capacity,
144         "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
145
146  TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity);
147
148  if (new_base == NULL) {
149    log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
150    return false;
151  }
152  // Release old mapping.
153  if (_base != NULL) {
154    MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
155  }
156
157  _base = new_base;
158  _chunk_capacity = new_capacity;
159  set_empty();
160
161  return true;
162}
163
164size_t G1CMMarkStack::capacity_alignment() {
165  return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
166}
167
168bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
169  guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
170
171  size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
172
173  _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
174  size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
175
176  guarantee(initial_chunk_capacity <= _max_chunk_capacity,
177            "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
178            _max_chunk_capacity,
179            initial_chunk_capacity);
180
181  log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
182                initial_chunk_capacity, _max_chunk_capacity);
183
184  return resize(initial_chunk_capacity);
185}
186
187void G1CMMarkStack::expand() {
188  if (_chunk_capacity == _max_chunk_capacity) {
189    log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
190    return;
191  }
192  size_t old_capacity = _chunk_capacity;
193  // Double capacity if possible
194  size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
195
196  if (resize(new_capacity)) {
197    log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
198                  old_capacity, new_capacity);
199  } else {
200    log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
201                    old_capacity, new_capacity);
202  }
203}
204
205G1CMMarkStack::~G1CMMarkStack() {
206  if (_base != NULL) {
207    MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
208  }
209}
210
211void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
212  elem->next = *list;
213  *list = elem;
214}
215
216void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
217  MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
218  add_chunk_to_list(&_chunk_list, elem);
219  _chunks_in_chunk_list++;
220}
221
222void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
223  MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
224  add_chunk_to_list(&_free_list, elem);
225}
226
227G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
228  TaskQueueEntryChunk* result = *list;
229  if (result != NULL) {
230    *list = (*list)->next;
231  }
232  return result;
233}
234
235G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
236  MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
237  TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
238  if (result != NULL) {
239    _chunks_in_chunk_list--;
240  }
241  return result;
242}
243
244G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
245  MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
246  return remove_chunk_from_list(&_free_list);
247}
248
249G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
250  // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
251  // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
252  // wraparound of _hwm.
253  if (_hwm >= _chunk_capacity) {
254    return NULL;
255  }
256
257  size_t cur_idx = Atomic::add(1, &_hwm) - 1;
258  if (cur_idx >= _chunk_capacity) {
259    return NULL;
260  }
261
262  TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
263  result->next = NULL;
264  return result;
265}
266
267bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
268  // Get a new chunk.
269  TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
270
271  if (new_chunk == NULL) {
272    // Did not get a chunk from the free list. Allocate from backing memory.
273    new_chunk = allocate_new_chunk();
274
275    if (new_chunk == NULL) {
276      return false;
277    }
278  }
279
280  Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
281
282  add_chunk_to_chunk_list(new_chunk);
283
284  return true;
285}
286
287bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
288  TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
289
290  if (cur == NULL) {
291    return false;
292  }
293
294  Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
295
296  add_chunk_to_free_list(cur);
297  return true;
298}
299
300void G1CMMarkStack::set_empty() {
301  _chunks_in_chunk_list = 0;
302  _hwm = 0;
303  _chunk_list = NULL;
304  _free_list = NULL;
305}
306
307G1CMRootRegions::G1CMRootRegions() :
308  _cm(NULL), _scan_in_progress(false),
309  _should_abort(false), _claimed_survivor_index(0) { }
310
311void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
312  _survivors = survivors;
313  _cm = cm;
314}
315
316void G1CMRootRegions::prepare_for_scan() {
317  assert(!scan_in_progress(), "pre-condition");
318
319  // Currently, only survivors can be root regions.
320  _claimed_survivor_index = 0;
321  _scan_in_progress = _survivors->regions()->is_nonempty();
322  _should_abort = false;
323}
324
325HeapRegion* G1CMRootRegions::claim_next() {
326  if (_should_abort) {
327    // If someone has set the should_abort flag, we return NULL to
328    // force the caller to bail out of their loop.
329    return NULL;
330  }
331
332  // Currently, only survivors can be root regions.
333  const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
334
335  int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
336  if (claimed_index < survivor_regions->length()) {
337    return survivor_regions->at(claimed_index);
338  }
339  return NULL;
340}
341
342uint G1CMRootRegions::num_root_regions() const {
343  return (uint)_survivors->regions()->length();
344}
345
346void G1CMRootRegions::notify_scan_done() {
347  MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
348  _scan_in_progress = false;
349  RootRegionScan_lock->notify_all();
350}
351
352void G1CMRootRegions::cancel_scan() {
353  notify_scan_done();
354}
355
356void G1CMRootRegions::scan_finished() {
357  assert(scan_in_progress(), "pre-condition");
358
359  // Currently, only survivors can be root regions.
360  if (!_should_abort) {
361    assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
362    assert((uint)_claimed_survivor_index >= _survivors->length(),
363           "we should have claimed all survivors, claimed index = %u, length = %u",
364           (uint)_claimed_survivor_index, _survivors->length());
365  }
366
367  notify_scan_done();
368}
369
370bool G1CMRootRegions::wait_until_scan_finished() {
371  if (!scan_in_progress()) return false;
372
373  {
374    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
375    while (scan_in_progress()) {
376      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
377    }
378  }
379  return true;
380}
381
382uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
383  return MAX2((n_par_threads + 2) / 4, 1U);
384}
385
386G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
387  _g1h(g1h),
388  _markBitMap1(),
389  _markBitMap2(),
390  _parallel_marking_threads(0),
391  _max_parallel_marking_threads(0),
392  _sleep_factor(0.0),
393  _marking_task_overhead(1.0),
394  _cleanup_list("Cleanup List"),
395
396  _prevMarkBitMap(&_markBitMap1),
397  _nextMarkBitMap(&_markBitMap2),
398
399  _global_mark_stack(),
400  // _finger set in set_non_marking_state
401
402  _max_worker_id(ParallelGCThreads),
403  // _active_tasks set in set_non_marking_state
404  // _tasks set inside the constructor
405  _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
406  _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
407
408  _has_overflown(false),
409  _concurrent(false),
410  _has_aborted(false),
411  _restart_for_overflow(false),
412  _concurrent_marking_in_progress(false),
413  _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
414  _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
415
416  // _verbose_level set below
417
418  _init_times(),
419  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
420  _cleanup_times(),
421  _total_counting_time(0.0),
422  _total_rs_scrub_time(0.0),
423
424  _parallel_workers(NULL),
425
426  _completed_initialization(false) {
427
428  _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
429  _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
430
431  // Create & start a ConcurrentMark thread.
432  _cmThread = new ConcurrentMarkThread(this);
433  assert(cmThread() != NULL, "CM Thread should have been created");
434  assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
435  if (_cmThread->osthread() == NULL) {
436      vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
437  }
438
439  assert(CGC_lock != NULL, "Where's the CGC_lock?");
440  assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
441  assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
442
443  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
444  satb_qs.set_buffer_size(G1SATBBufferSize);
445
446  _root_regions.init(_g1h->survivor(), this);
447
448  if (ConcGCThreads > ParallelGCThreads) {
449    log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
450                    ConcGCThreads, ParallelGCThreads);
451    return;
452  }
453  if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
454    // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
455    // if both are set
456    _sleep_factor             = 0.0;
457    _marking_task_overhead    = 1.0;
458  } else if (G1MarkingOverheadPercent > 0) {
459    // We will calculate the number of parallel marking threads based
460    // on a target overhead with respect to the soft real-time goal
461    double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
462    double overall_cm_overhead =
463      (double) MaxGCPauseMillis * marking_overhead /
464      (double) GCPauseIntervalMillis;
465    double cpu_ratio = 1.0 / os::initial_active_processor_count();
466    double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
467    double marking_task_overhead =
468      overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
469    double sleep_factor =
470                       (1.0 - marking_task_overhead) / marking_task_overhead;
471
472    FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
473    _sleep_factor             = sleep_factor;
474    _marking_task_overhead    = marking_task_overhead;
475  } else {
476    // Calculate the number of parallel marking threads by scaling
477    // the number of parallel GC threads.
478    uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
479    FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
480    _sleep_factor             = 0.0;
481    _marking_task_overhead    = 1.0;
482  }
483
484  assert(ConcGCThreads > 0, "Should have been set");
485  log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
486  log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
487  _parallel_marking_threads = ConcGCThreads;
488  _max_parallel_marking_threads = _parallel_marking_threads;
489
490  _parallel_workers = new WorkGang("G1 Marker",
491       _max_parallel_marking_threads, false, true);
492  if (_parallel_workers == NULL) {
493    vm_exit_during_initialization("Failed necessary allocation.");
494  } else {
495    _parallel_workers->initialize_workers();
496  }
497
498  if (FLAG_IS_DEFAULT(MarkStackSize)) {
499    size_t mark_stack_size =
500      MIN2(MarkStackSizeMax,
501          MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
502    // Verify that the calculated value for MarkStackSize is in range.
503    // It would be nice to use the private utility routine from Arguments.
504    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
505      log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
506                      "must be between 1 and " SIZE_FORMAT,
507                      mark_stack_size, MarkStackSizeMax);
508      return;
509    }
510    FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
511  } else {
512    // Verify MarkStackSize is in range.
513    if (FLAG_IS_CMDLINE(MarkStackSize)) {
514      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
515        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
516          log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
517                          "must be between 1 and " SIZE_FORMAT,
518                          MarkStackSize, MarkStackSizeMax);
519          return;
520        }
521      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
522        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
523          log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
524                          " or for MarkStackSizeMax (" SIZE_FORMAT ")",
525                          MarkStackSize, MarkStackSizeMax);
526          return;
527        }
528      }
529    }
530  }
531
532  if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
533    vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
534  }
535
536  _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
537  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
538
539  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
540  _active_tasks = _max_worker_id;
541
542  for (uint i = 0; i < _max_worker_id; ++i) {
543    G1CMTaskQueue* task_queue = new G1CMTaskQueue();
544    task_queue->initialize();
545    _task_queues->register_queue(i, task_queue);
546
547    _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
548
549    _accum_task_vtime[i] = 0.0;
550  }
551
552  // so that the call below can read a sensible value
553  _heap_start = g1h->reserved_region().start();
554  set_non_marking_state();
555  _completed_initialization = true;
556}
557
558void G1ConcurrentMark::reset() {
559  // Starting values for these two. This should be called in a STW
560  // phase.
561  MemRegion reserved = _g1h->g1_reserved();
562  _heap_start = reserved.start();
563  _heap_end   = reserved.end();
564
565  // Separated the asserts so that we know which one fires.
566  assert(_heap_start != NULL, "heap bounds should look ok");
567  assert(_heap_end != NULL, "heap bounds should look ok");
568  assert(_heap_start < _heap_end, "heap bounds should look ok");
569
570  // Reset all the marking data structures and any necessary flags
571  reset_marking_state();
572
573  // We do reset all of them, since different phases will use
574  // different number of active threads. So, it's easiest to have all
575  // of them ready.
576  for (uint i = 0; i < _max_worker_id; ++i) {
577    _tasks[i]->reset(_nextMarkBitMap);
578  }
579
580  // we need this to make sure that the flag is on during the evac
581  // pause with initial mark piggy-backed
582  set_concurrent_marking_in_progress();
583}
584
585
586void G1ConcurrentMark::reset_marking_state() {
587  _global_mark_stack.set_empty();
588
589  // Expand the marking stack, if we have to and if we can.
590  if (has_overflown()) {
591    _global_mark_stack.expand();
592  }
593
594  clear_has_overflown();
595  _finger = _heap_start;
596
597  for (uint i = 0; i < _max_worker_id; ++i) {
598    G1CMTaskQueue* queue = _task_queues->queue(i);
599    queue->set_empty();
600  }
601}
602
603void G1ConcurrentMark::set_concurrency(uint active_tasks) {
604  assert(active_tasks <= _max_worker_id, "we should not have more");
605
606  _active_tasks = active_tasks;
607  // Need to update the three data structures below according to the
608  // number of active threads for this phase.
609  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
610  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
611  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
612}
613
614void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
615  set_concurrency(active_tasks);
616
617  _concurrent = concurrent;
618  // We propagate this to all tasks, not just the active ones.
619  for (uint i = 0; i < _max_worker_id; ++i)
620    _tasks[i]->set_concurrent(concurrent);
621
622  if (concurrent) {
623    set_concurrent_marking_in_progress();
624  } else {
625    // We currently assume that the concurrent flag has been set to
626    // false before we start remark. At this point we should also be
627    // in a STW phase.
628    assert(!concurrent_marking_in_progress(), "invariant");
629    assert(out_of_regions(),
630           "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
631           p2i(_finger), p2i(_heap_end));
632  }
633}
634
635void G1ConcurrentMark::set_non_marking_state() {
636  // We set the global marking state to some default values when we're
637  // not doing marking.
638  reset_marking_state();
639  _active_tasks = 0;
640  clear_concurrent_marking_in_progress();
641}
642
643G1ConcurrentMark::~G1ConcurrentMark() {
644  // The G1ConcurrentMark instance is never freed.
645  ShouldNotReachHere();
646}
647
648class G1ClearBitMapTask : public AbstractGangTask {
649public:
650  static size_t chunk_size() { return M; }
651
652private:
653  // Heap region closure used for clearing the given mark bitmap.
654  class G1ClearBitmapHRClosure : public HeapRegionClosure {
655  private:
656    G1CMBitMap* _bitmap;
657    G1ConcurrentMark* _cm;
658  public:
659    G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
660    }
661
662    virtual bool doHeapRegion(HeapRegion* r) {
663      size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
664
665      HeapWord* cur = r->bottom();
666      HeapWord* const end = r->end();
667
668      while (cur < end) {
669        MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
670        _bitmap->clear_range(mr);
671
672        cur += chunk_size_in_words;
673
674        // Abort iteration if after yielding the marking has been aborted.
675        if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
676          return true;
677        }
678        // Repeat the asserts from before the start of the closure. We will do them
679        // as asserts here to minimize their overhead on the product. However, we
680        // will have them as guarantees at the beginning / end of the bitmap
681        // clearing to get some checking in the product.
682        assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
683        assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
684      }
685      assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
686
687      return false;
688    }
689  };
690
691  G1ClearBitmapHRClosure _cl;
692  HeapRegionClaimer _hr_claimer;
693  bool _suspendible; // If the task is suspendible, workers must join the STS.
694
695public:
696  G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
697    AbstractGangTask("G1 Clear Bitmap"),
698    _cl(bitmap, suspendible ? cm : NULL),
699    _hr_claimer(n_workers),
700    _suspendible(suspendible)
701  { }
702
703  void work(uint worker_id) {
704    SuspendibleThreadSetJoiner sts_join(_suspendible);
705    G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
706  }
707
708  bool is_complete() {
709    return _cl.complete();
710  }
711};
712
713void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
714  assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
715
716  size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
717  size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
718
719  uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
720
721  G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
722
723  log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
724  workers->run_task(&cl, num_workers);
725  guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
726}
727
728void G1ConcurrentMark::cleanup_for_next_mark() {
729  // Make sure that the concurrent mark thread looks to still be in
730  // the current cycle.
731  guarantee(cmThread()->during_cycle(), "invariant");
732
733  // We are finishing up the current cycle by clearing the next
734  // marking bitmap and getting it ready for the next cycle. During
735  // this time no other cycle can start. So, let's make sure that this
736  // is the case.
737  guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
738
739  clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
740
741  // Clear the live count data. If the marking has been aborted, the abort()
742  // call already did that.
743  if (!has_aborted()) {
744    clear_live_data(_parallel_workers);
745    DEBUG_ONLY(verify_live_data_clear());
746  }
747
748  // Repeat the asserts from above.
749  guarantee(cmThread()->during_cycle(), "invariant");
750  guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
751}
752
753void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
754  assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
755  clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false);
756}
757
758class CheckBitmapClearHRClosure : public HeapRegionClosure {
759  G1CMBitMap* _bitmap;
760  bool _error;
761 public:
762  CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
763  }
764
765  virtual bool doHeapRegion(HeapRegion* r) {
766    // This closure can be called concurrently to the mutator, so we must make sure
767    // that the result of the getNextMarkedWordAddress() call is compared to the
768    // value passed to it as limit to detect any found bits.
769    // end never changes in G1.
770    HeapWord* end = r->end();
771    return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
772  }
773};
774
775bool G1ConcurrentMark::nextMarkBitmapIsClear() {
776  CheckBitmapClearHRClosure cl(_nextMarkBitMap);
777  _g1h->heap_region_iterate(&cl);
778  return cl.complete();
779}
780
781class NoteStartOfMarkHRClosure: public HeapRegionClosure {
782public:
783  bool doHeapRegion(HeapRegion* r) {
784    r->note_start_of_marking();
785    return false;
786  }
787};
788
789void G1ConcurrentMark::checkpointRootsInitialPre() {
790  G1CollectedHeap* g1h = G1CollectedHeap::heap();
791  G1Policy* g1p = g1h->g1_policy();
792
793  _has_aborted = false;
794
795  // Initialize marking structures. This has to be done in a STW phase.
796  reset();
797
798  // For each region note start of marking.
799  NoteStartOfMarkHRClosure startcl;
800  g1h->heap_region_iterate(&startcl);
801}
802
803
804void G1ConcurrentMark::checkpointRootsInitialPost() {
805  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
806
807  // Start Concurrent Marking weak-reference discovery.
808  ReferenceProcessor* rp = g1h->ref_processor_cm();
809  // enable ("weak") refs discovery
810  rp->enable_discovery();
811  rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
812
813  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
814  // This is the start of  the marking cycle, we're expected all
815  // threads to have SATB queues with active set to false.
816  satb_mq_set.set_active_all_threads(true, /* new active value */
817                                     false /* expected_active */);
818
819  _root_regions.prepare_for_scan();
820
821  // update_g1_committed() will be called at the end of an evac pause
822  // when marking is on. So, it's also called at the end of the
823  // initial-mark pause to update the heap end, if the heap expands
824  // during it. No need to call it here.
825}
826
827/*
828 * Notice that in the next two methods, we actually leave the STS
829 * during the barrier sync and join it immediately afterwards. If we
830 * do not do this, the following deadlock can occur: one thread could
831 * be in the barrier sync code, waiting for the other thread to also
832 * sync up, whereas another one could be trying to yield, while also
833 * waiting for the other threads to sync up too.
834 *
835 * Note, however, that this code is also used during remark and in
836 * this case we should not attempt to leave / enter the STS, otherwise
837 * we'll either hit an assert (debug / fastdebug) or deadlock
838 * (product). So we should only leave / enter the STS if we are
839 * operating concurrently.
840 *
841 * Because the thread that does the sync barrier has left the STS, it
842 * is possible to be suspended for a Full GC or an evacuation pause
843 * could occur. This is actually safe, since the entering the sync
844 * barrier is one of the last things do_marking_step() does, and it
845 * doesn't manipulate any data structures afterwards.
846 */
847
848void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
849  bool barrier_aborted;
850  {
851    SuspendibleThreadSetLeaver sts_leave(concurrent());
852    barrier_aborted = !_first_overflow_barrier_sync.enter();
853  }
854
855  // at this point everyone should have synced up and not be doing any
856  // more work
857
858  if (barrier_aborted) {
859    // If the barrier aborted we ignore the overflow condition and
860    // just abort the whole marking phase as quickly as possible.
861    return;
862  }
863
864  // If we're executing the concurrent phase of marking, reset the marking
865  // state; otherwise the marking state is reset after reference processing,
866  // during the remark pause.
867  // If we reset here as a result of an overflow during the remark we will
868  // see assertion failures from any subsequent set_concurrency_and_phase()
869  // calls.
870  if (concurrent()) {
871    // let the task associated with with worker 0 do this
872    if (worker_id == 0) {
873      // task 0 is responsible for clearing the global data structures
874      // We should be here because of an overflow. During STW we should
875      // not clear the overflow flag since we rely on it being true when
876      // we exit this method to abort the pause and restart concurrent
877      // marking.
878      reset_marking_state();
879
880      log_info(gc, marking)("Concurrent Mark reset for overflow");
881    }
882  }
883
884  // after this, each task should reset its own data structures then
885  // then go into the second barrier
886}
887
888void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
889  SuspendibleThreadSetLeaver sts_leave(concurrent());
890  _second_overflow_barrier_sync.enter();
891
892  // at this point everything should be re-initialized and ready to go
893}
894
895class G1CMConcurrentMarkingTask: public AbstractGangTask {
896private:
897  G1ConcurrentMark*     _cm;
898  ConcurrentMarkThread* _cmt;
899
900public:
901  void work(uint worker_id) {
902    assert(Thread::current()->is_ConcurrentGC_thread(),
903           "this should only be done by a conc GC thread");
904    ResourceMark rm;
905
906    double start_vtime = os::elapsedVTime();
907
908    {
909      SuspendibleThreadSetJoiner sts_join;
910
911      assert(worker_id < _cm->active_tasks(), "invariant");
912      G1CMTask* the_task = _cm->task(worker_id);
913      the_task->record_start_time();
914      if (!_cm->has_aborted()) {
915        do {
916          double start_vtime_sec = os::elapsedVTime();
917          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
918
919          the_task->do_marking_step(mark_step_duration_ms,
920                                    true  /* do_termination */,
921                                    false /* is_serial*/);
922
923          double end_vtime_sec = os::elapsedVTime();
924          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
925          _cm->do_yield_check();
926
927          jlong sleep_time_ms;
928          if (!_cm->has_aborted() && the_task->has_aborted()) {
929            sleep_time_ms =
930              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
931            {
932              SuspendibleThreadSetLeaver sts_leave;
933              os::sleep(Thread::current(), sleep_time_ms, false);
934            }
935          }
936        } while (!_cm->has_aborted() && the_task->has_aborted());
937      }
938      the_task->record_end_time();
939      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
940    }
941
942    double end_vtime = os::elapsedVTime();
943    _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
944  }
945
946  G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
947                            ConcurrentMarkThread* cmt) :
948      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
949
950  ~G1CMConcurrentMarkingTask() { }
951};
952
953// Calculates the number of active workers for a concurrent
954// phase.
955uint G1ConcurrentMark::calc_parallel_marking_threads() {
956  uint n_conc_workers = 0;
957  if (!UseDynamicNumberOfGCThreads ||
958      (!FLAG_IS_DEFAULT(ConcGCThreads) &&
959       !ForceDynamicNumberOfGCThreads)) {
960    n_conc_workers = max_parallel_marking_threads();
961  } else {
962    n_conc_workers =
963      AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
964                                                      1, /* Minimum workers */
965                                                      parallel_marking_threads(),
966                                                      Threads::number_of_non_daemon_threads());
967    // Don't scale down "n_conc_workers" by scale_parallel_threads() because
968    // that scaling has already gone into "_max_parallel_marking_threads".
969  }
970  assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
971         "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
972         max_parallel_marking_threads(), n_conc_workers);
973  return n_conc_workers;
974}
975
976void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
977  // Currently, only survivors can be root regions.
978  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
979  G1RootRegionScanClosure cl(_g1h, this);
980
981  const uintx interval = PrefetchScanIntervalInBytes;
982  HeapWord* curr = hr->bottom();
983  const HeapWord* end = hr->top();
984  while (curr < end) {
985    Prefetch::read(curr, interval);
986    oop obj = oop(curr);
987    int size = obj->oop_iterate_size(&cl);
988    assert(size == obj->size(), "sanity");
989    curr += size;
990  }
991}
992
993class G1CMRootRegionScanTask : public AbstractGangTask {
994private:
995  G1ConcurrentMark* _cm;
996
997public:
998  G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
999    AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
1000
1001  void work(uint worker_id) {
1002    assert(Thread::current()->is_ConcurrentGC_thread(),
1003           "this should only be done by a conc GC thread");
1004
1005    G1CMRootRegions* root_regions = _cm->root_regions();
1006    HeapRegion* hr = root_regions->claim_next();
1007    while (hr != NULL) {
1008      _cm->scanRootRegion(hr);
1009      hr = root_regions->claim_next();
1010    }
1011  }
1012};
1013
1014void G1ConcurrentMark::scan_root_regions() {
1015  // scan_in_progress() will have been set to true only if there was
1016  // at least one root region to scan. So, if it's false, we
1017  // should not attempt to do any further work.
1018  if (root_regions()->scan_in_progress()) {
1019    assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1020
1021    _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
1022                                     // We distribute work on a per-region basis, so starting
1023                                     // more threads than that is useless.
1024                                     root_regions()->num_root_regions());
1025    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1026           "Maximum number of marking threads exceeded");
1027
1028    G1CMRootRegionScanTask task(this);
1029    log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1030                        task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
1031    _parallel_workers->run_task(&task, _parallel_marking_threads);
1032
1033    // It's possible that has_aborted() is true here without actually
1034    // aborting the survivor scan earlier. This is OK as it's
1035    // mainly used for sanity checking.
1036    root_regions()->scan_finished();
1037  }
1038}
1039
1040void G1ConcurrentMark::concurrent_cycle_start() {
1041  _gc_timer_cm->register_gc_start();
1042
1043  _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1044
1045  _g1h->trace_heap_before_gc(_gc_tracer_cm);
1046}
1047
1048void G1ConcurrentMark::concurrent_cycle_end() {
1049  _g1h->trace_heap_after_gc(_gc_tracer_cm);
1050
1051  if (has_aborted()) {
1052    _gc_tracer_cm->report_concurrent_mode_failure();
1053  }
1054
1055  _gc_timer_cm->register_gc_end();
1056
1057  _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1058}
1059
1060void G1ConcurrentMark::mark_from_roots() {
1061  // we might be tempted to assert that:
1062  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1063  //        "inconsistent argument?");
1064  // However that wouldn't be right, because it's possible that
1065  // a safepoint is indeed in progress as a younger generation
1066  // stop-the-world GC happens even as we mark in this generation.
1067
1068  _restart_for_overflow = false;
1069
1070  // _g1h has _n_par_threads
1071  _parallel_marking_threads = calc_parallel_marking_threads();
1072  assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1073    "Maximum number of marking threads exceeded");
1074
1075  uint active_workers = MAX2(1U, parallel_marking_threads());
1076  assert(active_workers > 0, "Should have been set");
1077
1078  // Setting active workers is not guaranteed since fewer
1079  // worker threads may currently exist and more may not be
1080  // available.
1081  active_workers = _parallel_workers->update_active_workers(active_workers);
1082  log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1083
1084  // Parallel task terminator is set in "set_concurrency_and_phase()"
1085  set_concurrency_and_phase(active_workers, true /* concurrent */);
1086
1087  G1CMConcurrentMarkingTask markingTask(this, cmThread());
1088  _parallel_workers->run_task(&markingTask);
1089  print_stats();
1090}
1091
1092void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1093  // world is stopped at this checkpoint
1094  assert(SafepointSynchronize::is_at_safepoint(),
1095         "world should be stopped");
1096
1097  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1098
1099  // If a full collection has happened, we shouldn't do this.
1100  if (has_aborted()) {
1101    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1102    return;
1103  }
1104
1105  SvcGCMarker sgcm(SvcGCMarker::OTHER);
1106
1107  if (VerifyDuringGC) {
1108    HandleMark hm;  // handle scope
1109    g1h->prepare_for_verify();
1110    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1111  }
1112  g1h->verifier()->check_bitmaps("Remark Start");
1113
1114  G1Policy* g1p = g1h->g1_policy();
1115  g1p->record_concurrent_mark_remark_start();
1116
1117  double start = os::elapsedTime();
1118
1119  checkpointRootsFinalWork();
1120
1121  double mark_work_end = os::elapsedTime();
1122
1123  weakRefsWork(clear_all_soft_refs);
1124
1125  if (has_overflown()) {
1126    // We overflowed.  Restart concurrent marking.
1127    _restart_for_overflow = true;
1128
1129    // Verify the heap w.r.t. the previous marking bitmap.
1130    if (VerifyDuringGC) {
1131      HandleMark hm;  // handle scope
1132      g1h->prepare_for_verify();
1133      Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1134    }
1135
1136    // Clear the marking state because we will be restarting
1137    // marking due to overflowing the global mark stack.
1138    reset_marking_state();
1139  } else {
1140    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1141    // We're done with marking.
1142    // This is the end of  the marking cycle, we're expected all
1143    // threads to have SATB queues with active set to true.
1144    satb_mq_set.set_active_all_threads(false, /* new active value */
1145                                       true /* expected_active */);
1146
1147    if (VerifyDuringGC) {
1148      HandleMark hm;  // handle scope
1149      g1h->prepare_for_verify();
1150      Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1151    }
1152    g1h->verifier()->check_bitmaps("Remark End");
1153    assert(!restart_for_overflow(), "sanity");
1154    // Completely reset the marking state since marking completed
1155    set_non_marking_state();
1156  }
1157
1158  // Statistics
1159  double now = os::elapsedTime();
1160  _remark_mark_times.add((mark_work_end - start) * 1000.0);
1161  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1162  _remark_times.add((now - start) * 1000.0);
1163
1164  g1p->record_concurrent_mark_remark_end();
1165
1166  G1CMIsAliveClosure is_alive(g1h);
1167  _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1168}
1169
1170class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1171  G1CollectedHeap* _g1;
1172  size_t _freed_bytes;
1173  FreeRegionList* _local_cleanup_list;
1174  uint _old_regions_removed;
1175  uint _humongous_regions_removed;
1176  HRRSCleanupTask* _hrrs_cleanup_task;
1177
1178public:
1179  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1180                             FreeRegionList* local_cleanup_list,
1181                             HRRSCleanupTask* hrrs_cleanup_task) :
1182    _g1(g1),
1183    _freed_bytes(0),
1184    _local_cleanup_list(local_cleanup_list),
1185    _old_regions_removed(0),
1186    _humongous_regions_removed(0),
1187    _hrrs_cleanup_task(hrrs_cleanup_task) { }
1188
1189  size_t freed_bytes() { return _freed_bytes; }
1190  const uint old_regions_removed() { return _old_regions_removed; }
1191  const uint humongous_regions_removed() { return _humongous_regions_removed; }
1192
1193  bool doHeapRegion(HeapRegion *hr) {
1194    if (hr->is_archive()) {
1195      return false;
1196    }
1197    _g1->reset_gc_time_stamps(hr);
1198    hr->note_end_of_marking();
1199
1200    if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1201      _freed_bytes += hr->used();
1202      hr->set_containing_set(NULL);
1203      if (hr->is_humongous()) {
1204        _humongous_regions_removed++;
1205        _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1206      } else {
1207        _old_regions_removed++;
1208        _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1209      }
1210    } else {
1211      hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1212    }
1213
1214    return false;
1215  }
1216};
1217
1218class G1ParNoteEndTask: public AbstractGangTask {
1219  friend class G1NoteEndOfConcMarkClosure;
1220
1221protected:
1222  G1CollectedHeap* _g1h;
1223  FreeRegionList* _cleanup_list;
1224  HeapRegionClaimer _hrclaimer;
1225
1226public:
1227  G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1228      AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1229  }
1230
1231  void work(uint worker_id) {
1232    FreeRegionList local_cleanup_list("Local Cleanup List");
1233    HRRSCleanupTask hrrs_cleanup_task;
1234    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1235                                           &hrrs_cleanup_task);
1236    _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1237    assert(g1_note_end.complete(), "Shouldn't have yielded!");
1238
1239    // Now update the lists
1240    _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1241    {
1242      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1243      _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1244
1245      // If we iterate over the global cleanup list at the end of
1246      // cleanup to do this printing we will not guarantee to only
1247      // generate output for the newly-reclaimed regions (the list
1248      // might not be empty at the beginning of cleanup; we might
1249      // still be working on its previous contents). So we do the
1250      // printing here, before we append the new regions to the global
1251      // cleanup list.
1252
1253      G1HRPrinter* hr_printer = _g1h->hr_printer();
1254      if (hr_printer->is_active()) {
1255        FreeRegionListIterator iter(&local_cleanup_list);
1256        while (iter.more_available()) {
1257          HeapRegion* hr = iter.get_next();
1258          hr_printer->cleanup(hr);
1259        }
1260      }
1261
1262      _cleanup_list->add_ordered(&local_cleanup_list);
1263      assert(local_cleanup_list.is_empty(), "post-condition");
1264
1265      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1266    }
1267  }
1268};
1269
1270void G1ConcurrentMark::cleanup() {
1271  // world is stopped at this checkpoint
1272  assert(SafepointSynchronize::is_at_safepoint(),
1273         "world should be stopped");
1274  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1275
1276  // If a full collection has happened, we shouldn't do this.
1277  if (has_aborted()) {
1278    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1279    return;
1280  }
1281
1282  g1h->verifier()->verify_region_sets_optional();
1283
1284  if (VerifyDuringGC) {
1285    HandleMark hm;  // handle scope
1286    g1h->prepare_for_verify();
1287    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1288  }
1289  g1h->verifier()->check_bitmaps("Cleanup Start");
1290
1291  G1Policy* g1p = g1h->g1_policy();
1292  g1p->record_concurrent_mark_cleanup_start();
1293
1294  double start = os::elapsedTime();
1295
1296  HeapRegionRemSet::reset_for_cleanup_tasks();
1297
1298  {
1299    GCTraceTime(Debug, gc)("Finalize Live Data");
1300    finalize_live_data();
1301  }
1302
1303  if (VerifyDuringGC) {
1304    GCTraceTime(Debug, gc)("Verify Live Data");
1305    verify_live_data();
1306  }
1307
1308  g1h->collector_state()->set_mark_in_progress(false);
1309
1310  double count_end = os::elapsedTime();
1311  double this_final_counting_time = (count_end - start);
1312  _total_counting_time += this_final_counting_time;
1313
1314  if (log_is_enabled(Trace, gc, liveness)) {
1315    G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1316    _g1h->heap_region_iterate(&cl);
1317  }
1318
1319  // Install newly created mark bitMap as "prev".
1320  swapMarkBitMaps();
1321
1322  g1h->reset_gc_time_stamp();
1323
1324  uint n_workers = _g1h->workers()->active_workers();
1325
1326  // Note end of marking in all heap regions.
1327  G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1328  g1h->workers()->run_task(&g1_par_note_end_task);
1329  g1h->check_gc_time_stamps();
1330
1331  if (!cleanup_list_is_empty()) {
1332    // The cleanup list is not empty, so we'll have to process it
1333    // concurrently. Notify anyone else that might be wanting free
1334    // regions that there will be more free regions coming soon.
1335    g1h->set_free_regions_coming();
1336  }
1337
1338  // call below, since it affects the metric by which we sort the heap
1339  // regions.
1340  if (G1ScrubRemSets) {
1341    double rs_scrub_start = os::elapsedTime();
1342    g1h->scrub_rem_set();
1343    _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1344  }
1345
1346  // this will also free any regions totally full of garbage objects,
1347  // and sort the regions.
1348  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1349
1350  // Statistics.
1351  double end = os::elapsedTime();
1352  _cleanup_times.add((end - start) * 1000.0);
1353
1354  // Clean up will have freed any regions completely full of garbage.
1355  // Update the soft reference policy with the new heap occupancy.
1356  Universe::update_heap_info_at_gc();
1357
1358  if (VerifyDuringGC) {
1359    HandleMark hm;  // handle scope
1360    g1h->prepare_for_verify();
1361    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1362  }
1363
1364  g1h->verifier()->check_bitmaps("Cleanup End");
1365
1366  g1h->verifier()->verify_region_sets_optional();
1367
1368  // We need to make this be a "collection" so any collection pause that
1369  // races with it goes around and waits for completeCleanup to finish.
1370  g1h->increment_total_collections();
1371
1372  // Clean out dead classes and update Metaspace sizes.
1373  if (ClassUnloadingWithConcurrentMark) {
1374    ClassLoaderDataGraph::purge();
1375  }
1376  MetaspaceGC::compute_new_size();
1377
1378  // We reclaimed old regions so we should calculate the sizes to make
1379  // sure we update the old gen/space data.
1380  g1h->g1mm()->update_sizes();
1381  g1h->allocation_context_stats().update_after_mark();
1382}
1383
1384void G1ConcurrentMark::complete_cleanup() {
1385  if (has_aborted()) return;
1386
1387  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1388
1389  _cleanup_list.verify_optional();
1390  FreeRegionList tmp_free_list("Tmp Free List");
1391
1392  log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1393                                  "cleanup list has %u entries",
1394                                  _cleanup_list.length());
1395
1396  // No one else should be accessing the _cleanup_list at this point,
1397  // so it is not necessary to take any locks
1398  while (!_cleanup_list.is_empty()) {
1399    HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1400    assert(hr != NULL, "Got NULL from a non-empty list");
1401    hr->par_clear();
1402    tmp_free_list.add_ordered(hr);
1403
1404    // Instead of adding one region at a time to the secondary_free_list,
1405    // we accumulate them in the local list and move them a few at a
1406    // time. This also cuts down on the number of notify_all() calls
1407    // we do during this process. We'll also append the local list when
1408    // _cleanup_list is empty (which means we just removed the last
1409    // region from the _cleanup_list).
1410    if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1411        _cleanup_list.is_empty()) {
1412      log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1413                                      "appending %u entries to the secondary_free_list, "
1414                                      "cleanup list still has %u entries",
1415                                      tmp_free_list.length(),
1416                                      _cleanup_list.length());
1417
1418      {
1419        MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1420        g1h->secondary_free_list_add(&tmp_free_list);
1421        SecondaryFreeList_lock->notify_all();
1422      }
1423#ifndef PRODUCT
1424      if (G1StressConcRegionFreeing) {
1425        for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1426          os::sleep(Thread::current(), (jlong) 1, false);
1427        }
1428      }
1429#endif
1430    }
1431  }
1432  assert(tmp_free_list.is_empty(), "post-condition");
1433}
1434
1435// Supporting Object and Oop closures for reference discovery
1436// and processing in during marking
1437
1438bool G1CMIsAliveClosure::do_object_b(oop obj) {
1439  HeapWord* addr = (HeapWord*)obj;
1440  return addr != NULL &&
1441         (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1442}
1443
1444// 'Keep Alive' oop closure used by both serial parallel reference processing.
1445// Uses the G1CMTask associated with a worker thread (for serial reference
1446// processing the G1CMTask for worker 0 is used) to preserve (mark) and
1447// trace referent objects.
1448//
1449// Using the G1CMTask and embedded local queues avoids having the worker
1450// threads operating on the global mark stack. This reduces the risk
1451// of overflowing the stack - which we would rather avoid at this late
1452// state. Also using the tasks' local queues removes the potential
1453// of the workers interfering with each other that could occur if
1454// operating on the global stack.
1455
1456class G1CMKeepAliveAndDrainClosure: public OopClosure {
1457  G1ConcurrentMark* _cm;
1458  G1CMTask*         _task;
1459  int               _ref_counter_limit;
1460  int               _ref_counter;
1461  bool              _is_serial;
1462 public:
1463  G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1464    _cm(cm), _task(task), _is_serial(is_serial),
1465    _ref_counter_limit(G1RefProcDrainInterval) {
1466    assert(_ref_counter_limit > 0, "sanity");
1467    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1468    _ref_counter = _ref_counter_limit;
1469  }
1470
1471  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1472  virtual void do_oop(      oop* p) { do_oop_work(p); }
1473
1474  template <class T> void do_oop_work(T* p) {
1475    if (!_cm->has_overflown()) {
1476      oop obj = oopDesc::load_decode_heap_oop(p);
1477      _task->deal_with_reference(obj);
1478      _ref_counter--;
1479
1480      if (_ref_counter == 0) {
1481        // We have dealt with _ref_counter_limit references, pushing them
1482        // and objects reachable from them on to the local stack (and
1483        // possibly the global stack). Call G1CMTask::do_marking_step() to
1484        // process these entries.
1485        //
1486        // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1487        // there's nothing more to do (i.e. we're done with the entries that
1488        // were pushed as a result of the G1CMTask::deal_with_reference() calls
1489        // above) or we overflow.
1490        //
1491        // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1492        // flag while there may still be some work to do. (See the comment at
1493        // the beginning of G1CMTask::do_marking_step() for those conditions -
1494        // one of which is reaching the specified time target.) It is only
1495        // when G1CMTask::do_marking_step() returns without setting the
1496        // has_aborted() flag that the marking step has completed.
1497        do {
1498          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1499          _task->do_marking_step(mark_step_duration_ms,
1500                                 false      /* do_termination */,
1501                                 _is_serial);
1502        } while (_task->has_aborted() && !_cm->has_overflown());
1503        _ref_counter = _ref_counter_limit;
1504      }
1505    }
1506  }
1507};
1508
1509// 'Drain' oop closure used by both serial and parallel reference processing.
1510// Uses the G1CMTask associated with a given worker thread (for serial
1511// reference processing the G1CMtask for worker 0 is used). Calls the
1512// do_marking_step routine, with an unbelievably large timeout value,
1513// to drain the marking data structures of the remaining entries
1514// added by the 'keep alive' oop closure above.
1515
1516class G1CMDrainMarkingStackClosure: public VoidClosure {
1517  G1ConcurrentMark* _cm;
1518  G1CMTask*         _task;
1519  bool              _is_serial;
1520 public:
1521  G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1522    _cm(cm), _task(task), _is_serial(is_serial) {
1523    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1524  }
1525
1526  void do_void() {
1527    do {
1528      // We call G1CMTask::do_marking_step() to completely drain the local
1529      // and global marking stacks of entries pushed by the 'keep alive'
1530      // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1531      //
1532      // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1533      // if there's nothing more to do (i.e. we've completely drained the
1534      // entries that were pushed as a a result of applying the 'keep alive'
1535      // closure to the entries on the discovered ref lists) or we overflow
1536      // the global marking stack.
1537      //
1538      // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1539      // flag while there may still be some work to do. (See the comment at
1540      // the beginning of G1CMTask::do_marking_step() for those conditions -
1541      // one of which is reaching the specified time target.) It is only
1542      // when G1CMTask::do_marking_step() returns without setting the
1543      // has_aborted() flag that the marking step has completed.
1544
1545      _task->do_marking_step(1000000000.0 /* something very large */,
1546                             true         /* do_termination */,
1547                             _is_serial);
1548    } while (_task->has_aborted() && !_cm->has_overflown());
1549  }
1550};
1551
1552// Implementation of AbstractRefProcTaskExecutor for parallel
1553// reference processing at the end of G1 concurrent marking
1554
1555class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1556private:
1557  G1CollectedHeap*  _g1h;
1558  G1ConcurrentMark* _cm;
1559  WorkGang*         _workers;
1560  uint              _active_workers;
1561
1562public:
1563  G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1564                          G1ConcurrentMark* cm,
1565                          WorkGang* workers,
1566                          uint n_workers) :
1567    _g1h(g1h), _cm(cm),
1568    _workers(workers), _active_workers(n_workers) { }
1569
1570  // Executes the given task using concurrent marking worker threads.
1571  virtual void execute(ProcessTask& task);
1572  virtual void execute(EnqueueTask& task);
1573};
1574
1575class G1CMRefProcTaskProxy: public AbstractGangTask {
1576  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1577  ProcessTask&      _proc_task;
1578  G1CollectedHeap*  _g1h;
1579  G1ConcurrentMark* _cm;
1580
1581public:
1582  G1CMRefProcTaskProxy(ProcessTask& proc_task,
1583                       G1CollectedHeap* g1h,
1584                       G1ConcurrentMark* cm) :
1585    AbstractGangTask("Process reference objects in parallel"),
1586    _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1587    ReferenceProcessor* rp = _g1h->ref_processor_cm();
1588    assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1589  }
1590
1591  virtual void work(uint worker_id) {
1592    ResourceMark rm;
1593    HandleMark hm;
1594    G1CMTask* task = _cm->task(worker_id);
1595    G1CMIsAliveClosure g1_is_alive(_g1h);
1596    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1597    G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1598
1599    _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1600  }
1601};
1602
1603void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1604  assert(_workers != NULL, "Need parallel worker threads.");
1605  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1606
1607  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1608
1609  // We need to reset the concurrency level before each
1610  // proxy task execution, so that the termination protocol
1611  // and overflow handling in G1CMTask::do_marking_step() knows
1612  // how many workers to wait for.
1613  _cm->set_concurrency(_active_workers);
1614  _workers->run_task(&proc_task_proxy);
1615}
1616
1617class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1618  typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1619  EnqueueTask& _enq_task;
1620
1621public:
1622  G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1623    AbstractGangTask("Enqueue reference objects in parallel"),
1624    _enq_task(enq_task) { }
1625
1626  virtual void work(uint worker_id) {
1627    _enq_task.work(worker_id);
1628  }
1629};
1630
1631void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1632  assert(_workers != NULL, "Need parallel worker threads.");
1633  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1634
1635  G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1636
1637  // Not strictly necessary but...
1638  //
1639  // We need to reset the concurrency level before each
1640  // proxy task execution, so that the termination protocol
1641  // and overflow handling in G1CMTask::do_marking_step() knows
1642  // how many workers to wait for.
1643  _cm->set_concurrency(_active_workers);
1644  _workers->run_task(&enq_task_proxy);
1645}
1646
1647void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1648  if (has_overflown()) {
1649    // Skip processing the discovered references if we have
1650    // overflown the global marking stack. Reference objects
1651    // only get discovered once so it is OK to not
1652    // de-populate the discovered reference lists. We could have,
1653    // but the only benefit would be that, when marking restarts,
1654    // less reference objects are discovered.
1655    return;
1656  }
1657
1658  ResourceMark rm;
1659  HandleMark   hm;
1660
1661  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1662
1663  // Is alive closure.
1664  G1CMIsAliveClosure g1_is_alive(g1h);
1665
1666  // Inner scope to exclude the cleaning of the string and symbol
1667  // tables from the displayed time.
1668  {
1669    GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1670
1671    ReferenceProcessor* rp = g1h->ref_processor_cm();
1672
1673    // See the comment in G1CollectedHeap::ref_processing_init()
1674    // about how reference processing currently works in G1.
1675
1676    // Set the soft reference policy
1677    rp->setup_policy(clear_all_soft_refs);
1678    assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1679
1680    // Instances of the 'Keep Alive' and 'Complete GC' closures used
1681    // in serial reference processing. Note these closures are also
1682    // used for serially processing (by the the current thread) the
1683    // JNI references during parallel reference processing.
1684    //
1685    // These closures do not need to synchronize with the worker
1686    // threads involved in parallel reference processing as these
1687    // instances are executed serially by the current thread (e.g.
1688    // reference processing is not multi-threaded and is thus
1689    // performed by the current thread instead of a gang worker).
1690    //
1691    // The gang tasks involved in parallel reference processing create
1692    // their own instances of these closures, which do their own
1693    // synchronization among themselves.
1694    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1695    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1696
1697    // We need at least one active thread. If reference processing
1698    // is not multi-threaded we use the current (VMThread) thread,
1699    // otherwise we use the work gang from the G1CollectedHeap and
1700    // we utilize all the worker threads we can.
1701    bool processing_is_mt = rp->processing_is_mt();
1702    uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1703    active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1704
1705    // Parallel processing task executor.
1706    G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1707                                              g1h->workers(), active_workers);
1708    AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1709
1710    // Set the concurrency level. The phase was already set prior to
1711    // executing the remark task.
1712    set_concurrency(active_workers);
1713
1714    // Set the degree of MT processing here.  If the discovery was done MT,
1715    // the number of threads involved during discovery could differ from
1716    // the number of active workers.  This is OK as long as the discovered
1717    // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1718    rp->set_active_mt_degree(active_workers);
1719
1720    // Process the weak references.
1721    const ReferenceProcessorStats& stats =
1722        rp->process_discovered_references(&g1_is_alive,
1723                                          &g1_keep_alive,
1724                                          &g1_drain_mark_stack,
1725                                          executor,
1726                                          _gc_timer_cm);
1727    _gc_tracer_cm->report_gc_reference_stats(stats);
1728
1729    // The do_oop work routines of the keep_alive and drain_marking_stack
1730    // oop closures will set the has_overflown flag if we overflow the
1731    // global marking stack.
1732
1733    assert(has_overflown() || _global_mark_stack.is_empty(),
1734            "Mark stack should be empty (unless it has overflown)");
1735
1736    assert(rp->num_q() == active_workers, "why not");
1737
1738    rp->enqueue_discovered_references(executor);
1739
1740    rp->verify_no_references_recorded();
1741    assert(!rp->discovery_enabled(), "Post condition");
1742  }
1743
1744  if (has_overflown()) {
1745    // We can not trust g1_is_alive if the marking stack overflowed
1746    return;
1747  }
1748
1749  assert(_global_mark_stack.is_empty(), "Marking should have completed");
1750
1751  // Unload Klasses, String, Symbols, Code Cache, etc.
1752  if (ClassUnloadingWithConcurrentMark) {
1753    GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1754    bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1755    g1h->complete_cleaning(&g1_is_alive, purged_classes);
1756  } else {
1757    GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1758    // No need to clean string table and symbol table as they are treated as strong roots when
1759    // class unloading is disabled.
1760    g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1761
1762  }
1763}
1764
1765void G1ConcurrentMark::swapMarkBitMaps() {
1766  G1CMBitMapRO* temp = _prevMarkBitMap;
1767  _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
1768  _nextMarkBitMap    = (G1CMBitMap*)  temp;
1769}
1770
1771// Closure for marking entries in SATB buffers.
1772class G1CMSATBBufferClosure : public SATBBufferClosure {
1773private:
1774  G1CMTask* _task;
1775  G1CollectedHeap* _g1h;
1776
1777  // This is very similar to G1CMTask::deal_with_reference, but with
1778  // more relaxed requirements for the argument, so this must be more
1779  // circumspect about treating the argument as an object.
1780  void do_entry(void* entry) const {
1781    _task->increment_refs_reached();
1782    HeapRegion* hr = _g1h->heap_region_containing(entry);
1783    if (entry < hr->next_top_at_mark_start()) {
1784      // Until we get here, we don't know whether entry refers to a valid
1785      // object; it could instead have been a stale reference.
1786      oop obj = static_cast<oop>(entry);
1787      assert(obj->is_oop(true /* ignore mark word */),
1788             "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
1789      _task->make_reference_grey(obj);
1790    }
1791  }
1792
1793public:
1794  G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1795    : _task(task), _g1h(g1h) { }
1796
1797  virtual void do_buffer(void** buffer, size_t size) {
1798    for (size_t i = 0; i < size; ++i) {
1799      do_entry(buffer[i]);
1800    }
1801  }
1802};
1803
1804class G1RemarkThreadsClosure : public ThreadClosure {
1805  G1CMSATBBufferClosure _cm_satb_cl;
1806  G1CMOopClosure _cm_cl;
1807  MarkingCodeBlobClosure _code_cl;
1808  int _thread_parity;
1809
1810 public:
1811  G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1812    _cm_satb_cl(task, g1h),
1813    _cm_cl(g1h, g1h->concurrent_mark(), task),
1814    _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1815    _thread_parity(Threads::thread_claim_parity()) {}
1816
1817  void do_thread(Thread* thread) {
1818    if (thread->is_Java_thread()) {
1819      if (thread->claim_oops_do(true, _thread_parity)) {
1820        JavaThread* jt = (JavaThread*)thread;
1821
1822        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1823        // however the liveness of oops reachable from nmethods have very complex lifecycles:
1824        // * Alive if on the stack of an executing method
1825        // * Weakly reachable otherwise
1826        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1827        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1828        jt->nmethods_do(&_code_cl);
1829
1830        jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1831      }
1832    } else if (thread->is_VM_thread()) {
1833      if (thread->claim_oops_do(true, _thread_parity)) {
1834        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1835      }
1836    }
1837  }
1838};
1839
1840class G1CMRemarkTask: public AbstractGangTask {
1841private:
1842  G1ConcurrentMark* _cm;
1843public:
1844  void work(uint worker_id) {
1845    // Since all available tasks are actually started, we should
1846    // only proceed if we're supposed to be active.
1847    if (worker_id < _cm->active_tasks()) {
1848      G1CMTask* task = _cm->task(worker_id);
1849      task->record_start_time();
1850      {
1851        ResourceMark rm;
1852        HandleMark hm;
1853
1854        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1855        Threads::threads_do(&threads_f);
1856      }
1857
1858      do {
1859        task->do_marking_step(1000000000.0 /* something very large */,
1860                              true         /* do_termination       */,
1861                              false        /* is_serial            */);
1862      } while (task->has_aborted() && !_cm->has_overflown());
1863      // If we overflow, then we do not want to restart. We instead
1864      // want to abort remark and do concurrent marking again.
1865      task->record_end_time();
1866    }
1867  }
1868
1869  G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1870    AbstractGangTask("Par Remark"), _cm(cm) {
1871    _cm->terminator()->reset_for_reuse(active_workers);
1872  }
1873};
1874
1875void G1ConcurrentMark::checkpointRootsFinalWork() {
1876  ResourceMark rm;
1877  HandleMark   hm;
1878  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1879
1880  GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1881
1882  g1h->ensure_parsability(false);
1883
1884  // this is remark, so we'll use up all active threads
1885  uint active_workers = g1h->workers()->active_workers();
1886  set_concurrency_and_phase(active_workers, false /* concurrent */);
1887  // Leave _parallel_marking_threads at it's
1888  // value originally calculated in the G1ConcurrentMark
1889  // constructor and pass values of the active workers
1890  // through the gang in the task.
1891
1892  {
1893    StrongRootsScope srs(active_workers);
1894
1895    G1CMRemarkTask remarkTask(this, active_workers);
1896    // We will start all available threads, even if we decide that the
1897    // active_workers will be fewer. The extra ones will just bail out
1898    // immediately.
1899    g1h->workers()->run_task(&remarkTask);
1900  }
1901
1902  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1903  guarantee(has_overflown() ||
1904            satb_mq_set.completed_buffers_num() == 0,
1905            "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1906            BOOL_TO_STR(has_overflown()),
1907            satb_mq_set.completed_buffers_num());
1908
1909  print_stats();
1910}
1911
1912void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1913  // Note we are overriding the read-only view of the prev map here, via
1914  // the cast.
1915  ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr);
1916}
1917
1918HeapRegion*
1919G1ConcurrentMark::claim_region(uint worker_id) {
1920  // "checkpoint" the finger
1921  HeapWord* finger = _finger;
1922
1923  // _heap_end will not change underneath our feet; it only changes at
1924  // yield points.
1925  while (finger < _heap_end) {
1926    assert(_g1h->is_in_g1_reserved(finger), "invariant");
1927
1928    HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1929    // Make sure that the reads below do not float before loading curr_region.
1930    OrderAccess::loadload();
1931    // Above heap_region_containing may return NULL as we always scan claim
1932    // until the end of the heap. In this case, just jump to the next region.
1933    HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1934
1935    // Is the gap between reading the finger and doing the CAS too long?
1936    HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1937    if (res == finger && curr_region != NULL) {
1938      // we succeeded
1939      HeapWord*   bottom        = curr_region->bottom();
1940      HeapWord*   limit         = curr_region->next_top_at_mark_start();
1941
1942      // notice that _finger == end cannot be guaranteed here since,
1943      // someone else might have moved the finger even further
1944      assert(_finger >= end, "the finger should have moved forward");
1945
1946      if (limit > bottom) {
1947        return curr_region;
1948      } else {
1949        assert(limit == bottom,
1950               "the region limit should be at bottom");
1951        // we return NULL and the caller should try calling
1952        // claim_region() again.
1953        return NULL;
1954      }
1955    } else {
1956      assert(_finger > finger, "the finger should have moved forward");
1957      // read it again
1958      finger = _finger;
1959    }
1960  }
1961
1962  return NULL;
1963}
1964
1965#ifndef PRODUCT
1966class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1967private:
1968  G1CollectedHeap* _g1h;
1969  const char* _phase;
1970  int _info;
1971
1972public:
1973  VerifyNoCSetOops(const char* phase, int info = -1) :
1974    _g1h(G1CollectedHeap::heap()),
1975    _phase(phase),
1976    _info(info)
1977  { }
1978
1979  void operator()(G1TaskQueueEntry task_entry) const {
1980    if (task_entry.is_array_slice()) {
1981      guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1982      return;
1983    }
1984    guarantee(task_entry.obj()->is_oop(),
1985              "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1986              p2i(task_entry.obj()), _phase, _info);
1987    guarantee(!_g1h->is_in_cset(task_entry.obj()),
1988              "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1989              p2i(task_entry.obj()), _phase, _info);
1990  }
1991};
1992
1993void G1ConcurrentMark::verify_no_cset_oops() {
1994  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1995  if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1996    return;
1997  }
1998
1999  // Verify entries on the global mark stack
2000  _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2001
2002  // Verify entries on the task queues
2003  for (uint i = 0; i < _max_worker_id; ++i) {
2004    G1CMTaskQueue* queue = _task_queues->queue(i);
2005    queue->iterate(VerifyNoCSetOops("Queue", i));
2006  }
2007
2008  // Verify the global finger
2009  HeapWord* global_finger = finger();
2010  if (global_finger != NULL && global_finger < _heap_end) {
2011    // Since we always iterate over all regions, we might get a NULL HeapRegion
2012    // here.
2013    HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2014    guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2015              "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2016              p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2017  }
2018
2019  // Verify the task fingers
2020  assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2021  for (uint i = 0; i < parallel_marking_threads(); ++i) {
2022    G1CMTask* task = _tasks[i];
2023    HeapWord* task_finger = task->finger();
2024    if (task_finger != NULL && task_finger < _heap_end) {
2025      // See above note on the global finger verification.
2026      HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2027      guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2028                !task_hr->in_collection_set(),
2029                "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2030                p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2031    }
2032  }
2033}
2034#endif // PRODUCT
2035void G1ConcurrentMark::create_live_data() {
2036  _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
2037}
2038
2039void G1ConcurrentMark::finalize_live_data() {
2040  _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
2041}
2042
2043void G1ConcurrentMark::verify_live_data() {
2044  _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
2045}
2046
2047void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
2048  _g1h->g1_rem_set()->clear_card_live_data(workers);
2049}
2050
2051#ifdef ASSERT
2052void G1ConcurrentMark::verify_live_data_clear() {
2053  _g1h->g1_rem_set()->verify_card_live_data_is_clear();
2054}
2055#endif
2056
2057void G1ConcurrentMark::print_stats() {
2058  if (!log_is_enabled(Debug, gc, stats)) {
2059    return;
2060  }
2061  log_debug(gc, stats)("---------------------------------------------------------------------");
2062  for (size_t i = 0; i < _active_tasks; ++i) {
2063    _tasks[i]->print_stats();
2064    log_debug(gc, stats)("---------------------------------------------------------------------");
2065  }
2066}
2067
2068void G1ConcurrentMark::abort() {
2069  if (!cmThread()->during_cycle() || _has_aborted) {
2070    // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2071    return;
2072  }
2073
2074  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2075  // concurrent bitmap clearing.
2076  {
2077    GCTraceTime(Debug, gc)("Clear Next Bitmap");
2078    clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2079  }
2080  // Note we cannot clear the previous marking bitmap here
2081  // since VerifyDuringGC verifies the objects marked during
2082  // a full GC against the previous bitmap.
2083
2084  {
2085    GCTraceTime(Debug, gc)("Clear Live Data");
2086    clear_live_data(_g1h->workers());
2087  }
2088  DEBUG_ONLY({
2089    GCTraceTime(Debug, gc)("Verify Live Data Clear");
2090    verify_live_data_clear();
2091  })
2092  // Empty mark stack
2093  reset_marking_state();
2094  for (uint i = 0; i < _max_worker_id; ++i) {
2095    _tasks[i]->clear_region_fields();
2096  }
2097  _first_overflow_barrier_sync.abort();
2098  _second_overflow_barrier_sync.abort();
2099  _has_aborted = true;
2100
2101  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2102  satb_mq_set.abandon_partial_marking();
2103  // This can be called either during or outside marking, we'll read
2104  // the expected_active value from the SATB queue set.
2105  satb_mq_set.set_active_all_threads(
2106                                 false, /* new active value */
2107                                 satb_mq_set.is_active() /* expected_active */);
2108}
2109
2110static void print_ms_time_info(const char* prefix, const char* name,
2111                               NumberSeq& ns) {
2112  log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2113                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2114  if (ns.num() > 0) {
2115    log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2116                           prefix, ns.sd(), ns.maximum());
2117  }
2118}
2119
2120void G1ConcurrentMark::print_summary_info() {
2121  Log(gc, marking) log;
2122  if (!log.is_trace()) {
2123    return;
2124  }
2125
2126  log.trace(" Concurrent marking:");
2127  print_ms_time_info("  ", "init marks", _init_times);
2128  print_ms_time_info("  ", "remarks", _remark_times);
2129  {
2130    print_ms_time_info("     ", "final marks", _remark_mark_times);
2131    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2132
2133  }
2134  print_ms_time_info("  ", "cleanups", _cleanup_times);
2135  log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2136            _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2137  if (G1ScrubRemSets) {
2138    log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2139              _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2140  }
2141  log.trace("  Total stop_world time = %8.2f s.",
2142            (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2143  log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2144            cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2145}
2146
2147void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2148  _parallel_workers->print_worker_threads_on(st);
2149}
2150
2151void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2152  _parallel_workers->threads_do(tc);
2153}
2154
2155void G1ConcurrentMark::print_on_error(outputStream* st) const {
2156  st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2157      p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2158  _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2159  _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2160}
2161
2162// Closure for iteration over bitmaps
2163class G1CMBitMapClosure : public BitMapClosure {
2164private:
2165  // the bitmap that is being iterated over
2166  G1CMBitMap*                 _nextMarkBitMap;
2167  G1ConcurrentMark*           _cm;
2168  G1CMTask*                   _task;
2169
2170public:
2171  G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2172    _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2173
2174  bool do_bit(size_t offset) {
2175    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2176    assert(_nextMarkBitMap->isMarked(addr), "invariant");
2177    assert( addr < _cm->finger(), "invariant");
2178    assert(addr >= _task->finger(), "invariant");
2179
2180    // We move that task's local finger along.
2181    _task->move_finger_to(addr);
2182
2183    _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
2184    // we only partially drain the local queue and global stack
2185    _task->drain_local_queue(true);
2186    _task->drain_global_stack(true);
2187
2188    // if the has_aborted flag has been raised, we need to bail out of
2189    // the iteration
2190    return !_task->has_aborted();
2191  }
2192};
2193
2194static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2195  ReferenceProcessor* result = g1h->ref_processor_cm();
2196  assert(result != NULL, "CM reference processor should not be NULL");
2197  return result;
2198}
2199
2200G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2201                               G1ConcurrentMark* cm,
2202                               G1CMTask* task)
2203  : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2204    _g1h(g1h), _cm(cm), _task(task)
2205{ }
2206
2207void G1CMTask::setup_for_region(HeapRegion* hr) {
2208  assert(hr != NULL,
2209        "claim_region() should have filtered out NULL regions");
2210  _curr_region  = hr;
2211  _finger       = hr->bottom();
2212  update_region_limit();
2213}
2214
2215void G1CMTask::update_region_limit() {
2216  HeapRegion* hr            = _curr_region;
2217  HeapWord* bottom          = hr->bottom();
2218  HeapWord* limit           = hr->next_top_at_mark_start();
2219
2220  if (limit == bottom) {
2221    // The region was collected underneath our feet.
2222    // We set the finger to bottom to ensure that the bitmap
2223    // iteration that will follow this will not do anything.
2224    // (this is not a condition that holds when we set the region up,
2225    // as the region is not supposed to be empty in the first place)
2226    _finger = bottom;
2227  } else if (limit >= _region_limit) {
2228    assert(limit >= _finger, "peace of mind");
2229  } else {
2230    assert(limit < _region_limit, "only way to get here");
2231    // This can happen under some pretty unusual circumstances.  An
2232    // evacuation pause empties the region underneath our feet (NTAMS
2233    // at bottom). We then do some allocation in the region (NTAMS
2234    // stays at bottom), followed by the region being used as a GC
2235    // alloc region (NTAMS will move to top() and the objects
2236    // originally below it will be grayed). All objects now marked in
2237    // the region are explicitly grayed, if below the global finger,
2238    // and we do not need in fact to scan anything else. So, we simply
2239    // set _finger to be limit to ensure that the bitmap iteration
2240    // doesn't do anything.
2241    _finger = limit;
2242  }
2243
2244  _region_limit = limit;
2245}
2246
2247void G1CMTask::giveup_current_region() {
2248  assert(_curr_region != NULL, "invariant");
2249  clear_region_fields();
2250}
2251
2252void G1CMTask::clear_region_fields() {
2253  // Values for these three fields that indicate that we're not
2254  // holding on to a region.
2255  _curr_region   = NULL;
2256  _finger        = NULL;
2257  _region_limit  = NULL;
2258}
2259
2260void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2261  if (cm_oop_closure == NULL) {
2262    assert(_cm_oop_closure != NULL, "invariant");
2263  } else {
2264    assert(_cm_oop_closure == NULL, "invariant");
2265  }
2266  _cm_oop_closure = cm_oop_closure;
2267}
2268
2269void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2270  guarantee(nextMarkBitMap != NULL, "invariant");
2271  _nextMarkBitMap                = nextMarkBitMap;
2272  clear_region_fields();
2273
2274  _calls                         = 0;
2275  _elapsed_time_ms               = 0.0;
2276  _termination_time_ms           = 0.0;
2277  _termination_start_time_ms     = 0.0;
2278}
2279
2280bool G1CMTask::should_exit_termination() {
2281  regular_clock_call();
2282  // This is called when we are in the termination protocol. We should
2283  // quit if, for some reason, this task wants to abort or the global
2284  // stack is not empty (this means that we can get work from it).
2285  return !_cm->mark_stack_empty() || has_aborted();
2286}
2287
2288void G1CMTask::reached_limit() {
2289  assert(_words_scanned >= _words_scanned_limit ||
2290         _refs_reached >= _refs_reached_limit ,
2291         "shouldn't have been called otherwise");
2292  regular_clock_call();
2293}
2294
2295void G1CMTask::regular_clock_call() {
2296  if (has_aborted()) return;
2297
2298  // First, we need to recalculate the words scanned and refs reached
2299  // limits for the next clock call.
2300  recalculate_limits();
2301
2302  // During the regular clock call we do the following
2303
2304  // (1) If an overflow has been flagged, then we abort.
2305  if (_cm->has_overflown()) {
2306    set_has_aborted();
2307    return;
2308  }
2309
2310  // If we are not concurrent (i.e. we're doing remark) we don't need
2311  // to check anything else. The other steps are only needed during
2312  // the concurrent marking phase.
2313  if (!concurrent()) return;
2314
2315  // (2) If marking has been aborted for Full GC, then we also abort.
2316  if (_cm->has_aborted()) {
2317    set_has_aborted();
2318    return;
2319  }
2320
2321  double curr_time_ms = os::elapsedVTime() * 1000.0;
2322
2323  // (4) We check whether we should yield. If we have to, then we abort.
2324  if (SuspendibleThreadSet::should_yield()) {
2325    // We should yield. To do this we abort the task. The caller is
2326    // responsible for yielding.
2327    set_has_aborted();
2328    return;
2329  }
2330
2331  // (5) We check whether we've reached our time quota. If we have,
2332  // then we abort.
2333  double elapsed_time_ms = curr_time_ms - _start_time_ms;
2334  if (elapsed_time_ms > _time_target_ms) {
2335    set_has_aborted();
2336    _has_timed_out = true;
2337    return;
2338  }
2339
2340  // (6) Finally, we check whether there are enough completed STAB
2341  // buffers available for processing. If there are, we abort.
2342  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2343  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2344    // we do need to process SATB buffers, we'll abort and restart
2345    // the marking task to do so
2346    set_has_aborted();
2347    return;
2348  }
2349}
2350
2351void G1CMTask::recalculate_limits() {
2352  _real_words_scanned_limit = _words_scanned + words_scanned_period;
2353  _words_scanned_limit      = _real_words_scanned_limit;
2354
2355  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2356  _refs_reached_limit       = _real_refs_reached_limit;
2357}
2358
2359void G1CMTask::decrease_limits() {
2360  // This is called when we believe that we're going to do an infrequent
2361  // operation which will increase the per byte scanned cost (i.e. move
2362  // entries to/from the global stack). It basically tries to decrease the
2363  // scanning limit so that the clock is called earlier.
2364
2365  _words_scanned_limit = _real_words_scanned_limit -
2366    3 * words_scanned_period / 4;
2367  _refs_reached_limit  = _real_refs_reached_limit -
2368    3 * refs_reached_period / 4;
2369}
2370
2371void G1CMTask::move_entries_to_global_stack() {
2372  // Local array where we'll store the entries that will be popped
2373  // from the local queue.
2374  G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2375
2376  size_t n = 0;
2377  G1TaskQueueEntry task_entry;
2378  while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2379    buffer[n] = task_entry;
2380    ++n;
2381  }
2382  if (n < G1CMMarkStack::EntriesPerChunk) {
2383    buffer[n] = G1TaskQueueEntry();
2384  }
2385
2386  if (n > 0) {
2387    if (!_cm->mark_stack_push(buffer)) {
2388      set_has_aborted();
2389    }
2390  }
2391
2392  // This operation was quite expensive, so decrease the limits.
2393  decrease_limits();
2394}
2395
2396bool G1CMTask::get_entries_from_global_stack() {
2397  // Local array where we'll store the entries that will be popped
2398  // from the global stack.
2399  G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2400
2401  if (!_cm->mark_stack_pop(buffer)) {
2402    return false;
2403  }
2404
2405  // We did actually pop at least one entry.
2406  for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2407    G1TaskQueueEntry task_entry = buffer[i];
2408    if (task_entry.is_null()) {
2409      break;
2410    }
2411    assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2412    bool success = _task_queue->push(task_entry);
2413    // We only call this when the local queue is empty or under a
2414    // given target limit. So, we do not expect this push to fail.
2415    assert(success, "invariant");
2416  }
2417
2418  // This operation was quite expensive, so decrease the limits
2419  decrease_limits();
2420  return true;
2421}
2422
2423void G1CMTask::drain_local_queue(bool partially) {
2424  if (has_aborted()) {
2425    return;
2426  }
2427
2428  // Decide what the target size is, depending whether we're going to
2429  // drain it partially (so that other tasks can steal if they run out
2430  // of things to do) or totally (at the very end).
2431  size_t target_size;
2432  if (partially) {
2433    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2434  } else {
2435    target_size = 0;
2436  }
2437
2438  if (_task_queue->size() > target_size) {
2439    G1TaskQueueEntry entry;
2440    bool ret = _task_queue->pop_local(entry);
2441    while (ret) {
2442      scan_task_entry(entry);
2443      if (_task_queue->size() <= target_size || has_aborted()) {
2444        ret = false;
2445      } else {
2446        ret = _task_queue->pop_local(entry);
2447      }
2448    }
2449  }
2450}
2451
2452void G1CMTask::drain_global_stack(bool partially) {
2453  if (has_aborted()) return;
2454
2455  // We have a policy to drain the local queue before we attempt to
2456  // drain the global stack.
2457  assert(partially || _task_queue->size() == 0, "invariant");
2458
2459  // Decide what the target size is, depending whether we're going to
2460  // drain it partially (so that other tasks can steal if they run out
2461  // of things to do) or totally (at the very end).
2462  // Notice that when draining the global mark stack partially, due to the racyness
2463  // of the mark stack size update we might in fact drop below the target. But,
2464  // this is not a problem.
2465  // In case of total draining, we simply process until the global mark stack is
2466  // totally empty, disregarding the size counter.
2467  if (partially) {
2468    size_t const target_size = _cm->partial_mark_stack_size_target();
2469    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2470      if (get_entries_from_global_stack()) {
2471        drain_local_queue(partially);
2472      }
2473    }
2474  } else {
2475    while (!has_aborted() && get_entries_from_global_stack()) {
2476      drain_local_queue(partially);
2477    }
2478  }
2479}
2480
2481// SATB Queue has several assumptions on whether to call the par or
2482// non-par versions of the methods. this is why some of the code is
2483// replicated. We should really get rid of the single-threaded version
2484// of the code to simplify things.
2485void G1CMTask::drain_satb_buffers() {
2486  if (has_aborted()) return;
2487
2488  // We set this so that the regular clock knows that we're in the
2489  // middle of draining buffers and doesn't set the abort flag when it
2490  // notices that SATB buffers are available for draining. It'd be
2491  // very counter productive if it did that. :-)
2492  _draining_satb_buffers = true;
2493
2494  G1CMSATBBufferClosure satb_cl(this, _g1h);
2495  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2496
2497  // This keeps claiming and applying the closure to completed buffers
2498  // until we run out of buffers or we need to abort.
2499  while (!has_aborted() &&
2500         satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2501    regular_clock_call();
2502  }
2503
2504  _draining_satb_buffers = false;
2505
2506  assert(has_aborted() ||
2507         concurrent() ||
2508         satb_mq_set.completed_buffers_num() == 0, "invariant");
2509
2510  // again, this was a potentially expensive operation, decrease the
2511  // limits to get the regular clock call early
2512  decrease_limits();
2513}
2514
2515void G1CMTask::print_stats() {
2516  log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2517                       _worker_id, _calls);
2518  log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2519                       _elapsed_time_ms, _termination_time_ms);
2520  log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2521                       _step_times_ms.num(), _step_times_ms.avg(),
2522                       _step_times_ms.sd());
2523  log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2524                       _step_times_ms.maximum(), _step_times_ms.sum());
2525}
2526
2527bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2528  return _task_queues->steal(worker_id, hash_seed, task_entry);
2529}
2530
2531/*****************************************************************************
2532
2533    The do_marking_step(time_target_ms, ...) method is the building
2534    block of the parallel marking framework. It can be called in parallel
2535    with other invocations of do_marking_step() on different tasks
2536    (but only one per task, obviously) and concurrently with the
2537    mutator threads, or during remark, hence it eliminates the need
2538    for two versions of the code. When called during remark, it will
2539    pick up from where the task left off during the concurrent marking
2540    phase. Interestingly, tasks are also claimable during evacuation
2541    pauses too, since do_marking_step() ensures that it aborts before
2542    it needs to yield.
2543
2544    The data structures that it uses to do marking work are the
2545    following:
2546
2547      (1) Marking Bitmap. If there are gray objects that appear only
2548      on the bitmap (this happens either when dealing with an overflow
2549      or when the initial marking phase has simply marked the roots
2550      and didn't push them on the stack), then tasks claim heap
2551      regions whose bitmap they then scan to find gray objects. A
2552      global finger indicates where the end of the last claimed region
2553      is. A local finger indicates how far into the region a task has
2554      scanned. The two fingers are used to determine how to gray an
2555      object (i.e. whether simply marking it is OK, as it will be
2556      visited by a task in the future, or whether it needs to be also
2557      pushed on a stack).
2558
2559      (2) Local Queue. The local queue of the task which is accessed
2560      reasonably efficiently by the task. Other tasks can steal from
2561      it when they run out of work. Throughout the marking phase, a
2562      task attempts to keep its local queue short but not totally
2563      empty, so that entries are available for stealing by other
2564      tasks. Only when there is no more work, a task will totally
2565      drain its local queue.
2566
2567      (3) Global Mark Stack. This handles local queue overflow. During
2568      marking only sets of entries are moved between it and the local
2569      queues, as access to it requires a mutex and more fine-grain
2570      interaction with it which might cause contention. If it
2571      overflows, then the marking phase should restart and iterate
2572      over the bitmap to identify gray objects. Throughout the marking
2573      phase, tasks attempt to keep the global mark stack at a small
2574      length but not totally empty, so that entries are available for
2575      popping by other tasks. Only when there is no more work, tasks
2576      will totally drain the global mark stack.
2577
2578      (4) SATB Buffer Queue. This is where completed SATB buffers are
2579      made available. Buffers are regularly removed from this queue
2580      and scanned for roots, so that the queue doesn't get too
2581      long. During remark, all completed buffers are processed, as
2582      well as the filled in parts of any uncompleted buffers.
2583
2584    The do_marking_step() method tries to abort when the time target
2585    has been reached. There are a few other cases when the
2586    do_marking_step() method also aborts:
2587
2588      (1) When the marking phase has been aborted (after a Full GC).
2589
2590      (2) When a global overflow (on the global stack) has been
2591      triggered. Before the task aborts, it will actually sync up with
2592      the other tasks to ensure that all the marking data structures
2593      (local queues, stacks, fingers etc.)  are re-initialized so that
2594      when do_marking_step() completes, the marking phase can
2595      immediately restart.
2596
2597      (3) When enough completed SATB buffers are available. The
2598      do_marking_step() method only tries to drain SATB buffers right
2599      at the beginning. So, if enough buffers are available, the
2600      marking step aborts and the SATB buffers are processed at
2601      the beginning of the next invocation.
2602
2603      (4) To yield. when we have to yield then we abort and yield
2604      right at the end of do_marking_step(). This saves us from a lot
2605      of hassle as, by yielding we might allow a Full GC. If this
2606      happens then objects will be compacted underneath our feet, the
2607      heap might shrink, etc. We save checking for this by just
2608      aborting and doing the yield right at the end.
2609
2610    From the above it follows that the do_marking_step() method should
2611    be called in a loop (or, otherwise, regularly) until it completes.
2612
2613    If a marking step completes without its has_aborted() flag being
2614    true, it means it has completed the current marking phase (and
2615    also all other marking tasks have done so and have all synced up).
2616
2617    A method called regular_clock_call() is invoked "regularly" (in
2618    sub ms intervals) throughout marking. It is this clock method that
2619    checks all the abort conditions which were mentioned above and
2620    decides when the task should abort. A work-based scheme is used to
2621    trigger this clock method: when the number of object words the
2622    marking phase has scanned or the number of references the marking
2623    phase has visited reach a given limit. Additional invocations to
2624    the method clock have been planted in a few other strategic places
2625    too. The initial reason for the clock method was to avoid calling
2626    vtime too regularly, as it is quite expensive. So, once it was in
2627    place, it was natural to piggy-back all the other conditions on it
2628    too and not constantly check them throughout the code.
2629
2630    If do_termination is true then do_marking_step will enter its
2631    termination protocol.
2632
2633    The value of is_serial must be true when do_marking_step is being
2634    called serially (i.e. by the VMThread) and do_marking_step should
2635    skip any synchronization in the termination and overflow code.
2636    Examples include the serial remark code and the serial reference
2637    processing closures.
2638
2639    The value of is_serial must be false when do_marking_step is
2640    being called by any of the worker threads in a work gang.
2641    Examples include the concurrent marking code (CMMarkingTask),
2642    the MT remark code, and the MT reference processing closures.
2643
2644 *****************************************************************************/
2645
2646void G1CMTask::do_marking_step(double time_target_ms,
2647                               bool do_termination,
2648                               bool is_serial) {
2649  assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2650  assert(concurrent() == _cm->concurrent(), "they should be the same");
2651
2652  G1Policy* g1_policy = _g1h->g1_policy();
2653  assert(_task_queues != NULL, "invariant");
2654  assert(_task_queue != NULL, "invariant");
2655  assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2656
2657  assert(!_claimed,
2658         "only one thread should claim this task at any one time");
2659
2660  // OK, this doesn't safeguard again all possible scenarios, as it is
2661  // possible for two threads to set the _claimed flag at the same
2662  // time. But it is only for debugging purposes anyway and it will
2663  // catch most problems.
2664  _claimed = true;
2665
2666  _start_time_ms = os::elapsedVTime() * 1000.0;
2667
2668  // If do_stealing is true then do_marking_step will attempt to
2669  // steal work from the other G1CMTasks. It only makes sense to
2670  // enable stealing when the termination protocol is enabled
2671  // and do_marking_step() is not being called serially.
2672  bool do_stealing = do_termination && !is_serial;
2673
2674  double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2675  _time_target_ms = time_target_ms - diff_prediction_ms;
2676
2677  // set up the variables that are used in the work-based scheme to
2678  // call the regular clock method
2679  _words_scanned = 0;
2680  _refs_reached  = 0;
2681  recalculate_limits();
2682
2683  // clear all flags
2684  clear_has_aborted();
2685  _has_timed_out = false;
2686  _draining_satb_buffers = false;
2687
2688  ++_calls;
2689
2690  // Set up the bitmap and oop closures. Anything that uses them is
2691  // eventually called from this method, so it is OK to allocate these
2692  // statically.
2693  G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
2694  G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2695  set_cm_oop_closure(&cm_oop_closure);
2696
2697  if (_cm->has_overflown()) {
2698    // This can happen if the mark stack overflows during a GC pause
2699    // and this task, after a yield point, restarts. We have to abort
2700    // as we need to get into the overflow protocol which happens
2701    // right at the end of this task.
2702    set_has_aborted();
2703  }
2704
2705  // First drain any available SATB buffers. After this, we will not
2706  // look at SATB buffers before the next invocation of this method.
2707  // If enough completed SATB buffers are queued up, the regular clock
2708  // will abort this task so that it restarts.
2709  drain_satb_buffers();
2710  // ...then partially drain the local queue and the global stack
2711  drain_local_queue(true);
2712  drain_global_stack(true);
2713
2714  do {
2715    if (!has_aborted() && _curr_region != NULL) {
2716      // This means that we're already holding on to a region.
2717      assert(_finger != NULL, "if region is not NULL, then the finger "
2718             "should not be NULL either");
2719
2720      // We might have restarted this task after an evacuation pause
2721      // which might have evacuated the region we're holding on to
2722      // underneath our feet. Let's read its limit again to make sure
2723      // that we do not iterate over a region of the heap that
2724      // contains garbage (update_region_limit() will also move
2725      // _finger to the start of the region if it is found empty).
2726      update_region_limit();
2727      // We will start from _finger not from the start of the region,
2728      // as we might be restarting this task after aborting half-way
2729      // through scanning this region. In this case, _finger points to
2730      // the address where we last found a marked object. If this is a
2731      // fresh region, _finger points to start().
2732      MemRegion mr = MemRegion(_finger, _region_limit);
2733
2734      assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2735             "humongous regions should go around loop once only");
2736
2737      // Some special cases:
2738      // If the memory region is empty, we can just give up the region.
2739      // If the current region is humongous then we only need to check
2740      // the bitmap for the bit associated with the start of the object,
2741      // scan the object if it's live, and give up the region.
2742      // Otherwise, let's iterate over the bitmap of the part of the region
2743      // that is left.
2744      // If the iteration is successful, give up the region.
2745      if (mr.is_empty()) {
2746        giveup_current_region();
2747        regular_clock_call();
2748      } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2749        if (_nextMarkBitMap->isMarked(mr.start())) {
2750          // The object is marked - apply the closure
2751          BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
2752          bitmap_closure.do_bit(offset);
2753        }
2754        // Even if this task aborted while scanning the humongous object
2755        // we can (and should) give up the current region.
2756        giveup_current_region();
2757        regular_clock_call();
2758      } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2759        giveup_current_region();
2760        regular_clock_call();
2761      } else {
2762        assert(has_aborted(), "currently the only way to do so");
2763        // The only way to abort the bitmap iteration is to return
2764        // false from the do_bit() method. However, inside the
2765        // do_bit() method we move the _finger to point to the
2766        // object currently being looked at. So, if we bail out, we
2767        // have definitely set _finger to something non-null.
2768        assert(_finger != NULL, "invariant");
2769
2770        // Region iteration was actually aborted. So now _finger
2771        // points to the address of the object we last scanned. If we
2772        // leave it there, when we restart this task, we will rescan
2773        // the object. It is easy to avoid this. We move the finger by
2774        // enough to point to the next possible object header (the
2775        // bitmap knows by how much we need to move it as it knows its
2776        // granularity).
2777        assert(_finger < _region_limit, "invariant");
2778        HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
2779        // Check if bitmap iteration was aborted while scanning the last object
2780        if (new_finger >= _region_limit) {
2781          giveup_current_region();
2782        } else {
2783          move_finger_to(new_finger);
2784        }
2785      }
2786    }
2787    // At this point we have either completed iterating over the
2788    // region we were holding on to, or we have aborted.
2789
2790    // We then partially drain the local queue and the global stack.
2791    // (Do we really need this?)
2792    drain_local_queue(true);
2793    drain_global_stack(true);
2794
2795    // Read the note on the claim_region() method on why it might
2796    // return NULL with potentially more regions available for
2797    // claiming and why we have to check out_of_regions() to determine
2798    // whether we're done or not.
2799    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2800      // We are going to try to claim a new region. We should have
2801      // given up on the previous one.
2802      // Separated the asserts so that we know which one fires.
2803      assert(_curr_region  == NULL, "invariant");
2804      assert(_finger       == NULL, "invariant");
2805      assert(_region_limit == NULL, "invariant");
2806      HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2807      if (claimed_region != NULL) {
2808        // Yes, we managed to claim one
2809        setup_for_region(claimed_region);
2810        assert(_curr_region == claimed_region, "invariant");
2811      }
2812      // It is important to call the regular clock here. It might take
2813      // a while to claim a region if, for example, we hit a large
2814      // block of empty regions. So we need to call the regular clock
2815      // method once round the loop to make sure it's called
2816      // frequently enough.
2817      regular_clock_call();
2818    }
2819
2820    if (!has_aborted() && _curr_region == NULL) {
2821      assert(_cm->out_of_regions(),
2822             "at this point we should be out of regions");
2823    }
2824  } while ( _curr_region != NULL && !has_aborted());
2825
2826  if (!has_aborted()) {
2827    // We cannot check whether the global stack is empty, since other
2828    // tasks might be pushing objects to it concurrently.
2829    assert(_cm->out_of_regions(),
2830           "at this point we should be out of regions");
2831    // Try to reduce the number of available SATB buffers so that
2832    // remark has less work to do.
2833    drain_satb_buffers();
2834  }
2835
2836  // Since we've done everything else, we can now totally drain the
2837  // local queue and global stack.
2838  drain_local_queue(false);
2839  drain_global_stack(false);
2840
2841  // Attempt at work stealing from other task's queues.
2842  if (do_stealing && !has_aborted()) {
2843    // We have not aborted. This means that we have finished all that
2844    // we could. Let's try to do some stealing...
2845
2846    // We cannot check whether the global stack is empty, since other
2847    // tasks might be pushing objects to it concurrently.
2848    assert(_cm->out_of_regions() && _task_queue->size() == 0,
2849           "only way to reach here");
2850    while (!has_aborted()) {
2851      G1TaskQueueEntry entry;
2852      if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2853        scan_task_entry(entry);
2854
2855        // And since we're towards the end, let's totally drain the
2856        // local queue and global stack.
2857        drain_local_queue(false);
2858        drain_global_stack(false);
2859      } else {
2860        break;
2861      }
2862    }
2863  }
2864
2865  // We still haven't aborted. Now, let's try to get into the
2866  // termination protocol.
2867  if (do_termination && !has_aborted()) {
2868    // We cannot check whether the global stack is empty, since other
2869    // tasks might be concurrently pushing objects on it.
2870    // Separated the asserts so that we know which one fires.
2871    assert(_cm->out_of_regions(), "only way to reach here");
2872    assert(_task_queue->size() == 0, "only way to reach here");
2873    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2874
2875    // The G1CMTask class also extends the TerminatorTerminator class,
2876    // hence its should_exit_termination() method will also decide
2877    // whether to exit the termination protocol or not.
2878    bool finished = (is_serial ||
2879                     _cm->terminator()->offer_termination(this));
2880    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2881    _termination_time_ms +=
2882      termination_end_time_ms - _termination_start_time_ms;
2883
2884    if (finished) {
2885      // We're all done.
2886
2887      if (_worker_id == 0) {
2888        // let's allow task 0 to do this
2889        if (concurrent()) {
2890          assert(_cm->concurrent_marking_in_progress(), "invariant");
2891          // we need to set this to false before the next
2892          // safepoint. This way we ensure that the marking phase
2893          // doesn't observe any more heap expansions.
2894          _cm->clear_concurrent_marking_in_progress();
2895        }
2896      }
2897
2898      // We can now guarantee that the global stack is empty, since
2899      // all other tasks have finished. We separated the guarantees so
2900      // that, if a condition is false, we can immediately find out
2901      // which one.
2902      guarantee(_cm->out_of_regions(), "only way to reach here");
2903      guarantee(_cm->mark_stack_empty(), "only way to reach here");
2904      guarantee(_task_queue->size() == 0, "only way to reach here");
2905      guarantee(!_cm->has_overflown(), "only way to reach here");
2906    } else {
2907      // Apparently there's more work to do. Let's abort this task. It
2908      // will restart it and we can hopefully find more things to do.
2909      set_has_aborted();
2910    }
2911  }
2912
2913  // Mainly for debugging purposes to make sure that a pointer to the
2914  // closure which was statically allocated in this frame doesn't
2915  // escape it by accident.
2916  set_cm_oop_closure(NULL);
2917  double end_time_ms = os::elapsedVTime() * 1000.0;
2918  double elapsed_time_ms = end_time_ms - _start_time_ms;
2919  // Update the step history.
2920  _step_times_ms.add(elapsed_time_ms);
2921
2922  if (has_aborted()) {
2923    // The task was aborted for some reason.
2924    if (_has_timed_out) {
2925      double diff_ms = elapsed_time_ms - _time_target_ms;
2926      // Keep statistics of how well we did with respect to hitting
2927      // our target only if we actually timed out (if we aborted for
2928      // other reasons, then the results might get skewed).
2929      _marking_step_diffs_ms.add(diff_ms);
2930    }
2931
2932    if (_cm->has_overflown()) {
2933      // This is the interesting one. We aborted because a global
2934      // overflow was raised. This means we have to restart the
2935      // marking phase and start iterating over regions. However, in
2936      // order to do this we have to make sure that all tasks stop
2937      // what they are doing and re-initialize in a safe manner. We
2938      // will achieve this with the use of two barrier sync points.
2939
2940      if (!is_serial) {
2941        // We only need to enter the sync barrier if being called
2942        // from a parallel context
2943        _cm->enter_first_sync_barrier(_worker_id);
2944
2945        // When we exit this sync barrier we know that all tasks have
2946        // stopped doing marking work. So, it's now safe to
2947        // re-initialize our data structures. At the end of this method,
2948        // task 0 will clear the global data structures.
2949      }
2950
2951      // We clear the local state of this task...
2952      clear_region_fields();
2953
2954      if (!is_serial) {
2955        // ...and enter the second barrier.
2956        _cm->enter_second_sync_barrier(_worker_id);
2957      }
2958      // At this point, if we're during the concurrent phase of
2959      // marking, everything has been re-initialized and we're
2960      // ready to restart.
2961    }
2962  }
2963
2964  _claimed = false;
2965}
2966
2967G1CMTask::G1CMTask(uint worker_id,
2968                   G1ConcurrentMark* cm,
2969                   G1CMTaskQueue* task_queue,
2970                   G1CMTaskQueueSet* task_queues)
2971  : _g1h(G1CollectedHeap::heap()),
2972    _worker_id(worker_id), _cm(cm),
2973    _objArray_processor(this),
2974    _claimed(false),
2975    _nextMarkBitMap(NULL), _hash_seed(17),
2976    _task_queue(task_queue),
2977    _task_queues(task_queues),
2978    _cm_oop_closure(NULL) {
2979  guarantee(task_queue != NULL, "invariant");
2980  guarantee(task_queues != NULL, "invariant");
2981
2982  _marking_step_diffs_ms.add(0.5);
2983}
2984
2985// These are formatting macros that are used below to ensure
2986// consistent formatting. The *_H_* versions are used to format the
2987// header for a particular value and they should be kept consistent
2988// with the corresponding macro. Also note that most of the macros add
2989// the necessary white space (as a prefix) which makes them a bit
2990// easier to compose.
2991
2992// All the output lines are prefixed with this string to be able to
2993// identify them easily in a large log file.
2994#define G1PPRL_LINE_PREFIX            "###"
2995
2996#define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2997#ifdef _LP64
2998#define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2999#else // _LP64
3000#define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3001#endif // _LP64
3002
3003// For per-region info
3004#define G1PPRL_TYPE_FORMAT            "   %-4s"
3005#define G1PPRL_TYPE_H_FORMAT          "   %4s"
3006#define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3007#define G1PPRL_BYTE_H_FORMAT          "  %9s"
3008#define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3009#define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3010
3011// For summary info
3012#define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3013#define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3014#define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3015#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3016
3017G1PrintRegionLivenessInfoClosure::
3018G1PrintRegionLivenessInfoClosure(const char* phase_name)
3019  : _total_used_bytes(0), _total_capacity_bytes(0),
3020    _total_prev_live_bytes(0), _total_next_live_bytes(0),
3021    _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3022  G1CollectedHeap* g1h = G1CollectedHeap::heap();
3023  MemRegion g1_reserved = g1h->g1_reserved();
3024  double now = os::elapsedTime();
3025
3026  // Print the header of the output.
3027  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3028  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3029                          G1PPRL_SUM_ADDR_FORMAT("reserved")
3030                          G1PPRL_SUM_BYTE_FORMAT("region-size"),
3031                          p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3032                          HeapRegion::GrainBytes);
3033  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3034  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3035                          G1PPRL_TYPE_H_FORMAT
3036                          G1PPRL_ADDR_BASE_H_FORMAT
3037                          G1PPRL_BYTE_H_FORMAT
3038                          G1PPRL_BYTE_H_FORMAT
3039                          G1PPRL_BYTE_H_FORMAT
3040                          G1PPRL_DOUBLE_H_FORMAT
3041                          G1PPRL_BYTE_H_FORMAT
3042                          G1PPRL_BYTE_H_FORMAT,
3043                          "type", "address-range",
3044                          "used", "prev-live", "next-live", "gc-eff",
3045                          "remset", "code-roots");
3046  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3047                          G1PPRL_TYPE_H_FORMAT
3048                          G1PPRL_ADDR_BASE_H_FORMAT
3049                          G1PPRL_BYTE_H_FORMAT
3050                          G1PPRL_BYTE_H_FORMAT
3051                          G1PPRL_BYTE_H_FORMAT
3052                          G1PPRL_DOUBLE_H_FORMAT
3053                          G1PPRL_BYTE_H_FORMAT
3054                          G1PPRL_BYTE_H_FORMAT,
3055                          "", "",
3056                          "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3057                          "(bytes)", "(bytes)");
3058}
3059
3060bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3061  const char* type       = r->get_type_str();
3062  HeapWord* bottom       = r->bottom();
3063  HeapWord* end          = r->end();
3064  size_t capacity_bytes  = r->capacity();
3065  size_t used_bytes      = r->used();
3066  size_t prev_live_bytes = r->live_bytes();
3067  size_t next_live_bytes = r->next_live_bytes();
3068  double gc_eff          = r->gc_efficiency();
3069  size_t remset_bytes    = r->rem_set()->mem_size();
3070  size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3071
3072  _total_used_bytes      += used_bytes;
3073  _total_capacity_bytes  += capacity_bytes;
3074  _total_prev_live_bytes += prev_live_bytes;
3075  _total_next_live_bytes += next_live_bytes;
3076  _total_remset_bytes    += remset_bytes;
3077  _total_strong_code_roots_bytes += strong_code_roots_bytes;
3078
3079  // Print a line for this particular region.
3080  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3081                          G1PPRL_TYPE_FORMAT
3082                          G1PPRL_ADDR_BASE_FORMAT
3083                          G1PPRL_BYTE_FORMAT
3084                          G1PPRL_BYTE_FORMAT
3085                          G1PPRL_BYTE_FORMAT
3086                          G1PPRL_DOUBLE_FORMAT
3087                          G1PPRL_BYTE_FORMAT
3088                          G1PPRL_BYTE_FORMAT,
3089                          type, p2i(bottom), p2i(end),
3090                          used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3091                          remset_bytes, strong_code_roots_bytes);
3092
3093  return false;
3094}
3095
3096G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3097  // add static memory usages to remembered set sizes
3098  _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3099  // Print the footer of the output.
3100  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3101  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3102                         " SUMMARY"
3103                         G1PPRL_SUM_MB_FORMAT("capacity")
3104                         G1PPRL_SUM_MB_PERC_FORMAT("used")
3105                         G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3106                         G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3107                         G1PPRL_SUM_MB_FORMAT("remset")
3108                         G1PPRL_SUM_MB_FORMAT("code-roots"),
3109                         bytes_to_mb(_total_capacity_bytes),
3110                         bytes_to_mb(_total_used_bytes),
3111                         perc(_total_used_bytes, _total_capacity_bytes),
3112                         bytes_to_mb(_total_prev_live_bytes),
3113                         perc(_total_prev_live_bytes, _total_capacity_bytes),
3114                         bytes_to_mb(_total_next_live_bytes),
3115                         perc(_total_next_live_bytes, _total_capacity_bytes),
3116                         bytes_to_mb(_total_remset_bytes),
3117                         bytes_to_mb(_total_strong_code_roots_bytes));
3118}
3119