1/*
2 * Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "gc/parallel/mutableNUMASpace.hpp"
27#include "gc/shared/collectedHeap.hpp"
28#include "gc/shared/spaceDecorator.hpp"
29#include "oops/oop.inline.hpp"
30#include "runtime/atomic.hpp"
31#include "runtime/thread.inline.hpp"
32#include "utilities/align.hpp"
33
34MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment), _must_use_large_pages(false) {
35  _lgrp_spaces = new (ResourceObj::C_HEAP, mtGC) GrowableArray<LGRPSpace*>(0, true);
36  _page_size = os::vm_page_size();
37  _adaptation_cycles = 0;
38  _samples_count = 0;
39
40#ifdef LINUX
41  // Changing the page size can lead to freeing of memory. When using large pages
42  // and the memory has been both reserved and committed, Linux does not support
43  // freeing parts of it.
44    if (UseLargePages && !os::can_commit_large_page_memory()) {
45      _must_use_large_pages = true;
46    }
47#endif // LINUX
48
49  update_layout(true);
50}
51
52MutableNUMASpace::~MutableNUMASpace() {
53  for (int i = 0; i < lgrp_spaces()->length(); i++) {
54    delete lgrp_spaces()->at(i);
55  }
56  delete lgrp_spaces();
57}
58
59#ifndef PRODUCT
60void MutableNUMASpace::mangle_unused_area() {
61  // This method should do nothing.
62  // It can be called on a numa space during a full compaction.
63}
64void MutableNUMASpace::mangle_unused_area_complete() {
65  // This method should do nothing.
66  // It can be called on a numa space during a full compaction.
67}
68void MutableNUMASpace::mangle_region(MemRegion mr) {
69  // This method should do nothing because numa spaces are not mangled.
70}
71void MutableNUMASpace::set_top_for_allocations(HeapWord* v) {
72  assert(false, "Do not mangle MutableNUMASpace's");
73}
74void MutableNUMASpace::set_top_for_allocations() {
75  // This method should do nothing.
76}
77void MutableNUMASpace::check_mangled_unused_area(HeapWord* limit) {
78  // This method should do nothing.
79}
80void MutableNUMASpace::check_mangled_unused_area_complete() {
81  // This method should do nothing.
82}
83#endif  // NOT_PRODUCT
84
85// There may be unallocated holes in the middle chunks
86// that should be filled with dead objects to ensure parsability.
87void MutableNUMASpace::ensure_parsability() {
88  for (int i = 0; i < lgrp_spaces()->length(); i++) {
89    LGRPSpace *ls = lgrp_spaces()->at(i);
90    MutableSpace *s = ls->space();
91    if (s->top() < top()) { // For all spaces preceding the one containing top()
92      if (s->free_in_words() > 0) {
93        intptr_t cur_top = (intptr_t)s->top();
94        size_t words_left_to_fill = pointer_delta(s->end(), s->top());;
95        while (words_left_to_fill > 0) {
96          size_t words_to_fill = MIN2(words_left_to_fill, CollectedHeap::filler_array_max_size());
97          assert(words_to_fill >= CollectedHeap::min_fill_size(),
98                 "Remaining size (" SIZE_FORMAT ") is too small to fill (based on " SIZE_FORMAT " and " SIZE_FORMAT ")",
99                 words_to_fill, words_left_to_fill, CollectedHeap::filler_array_max_size());
100          CollectedHeap::fill_with_object((HeapWord*)cur_top, words_to_fill);
101          if (!os::numa_has_static_binding()) {
102            size_t touched_words = words_to_fill;
103#ifndef ASSERT
104            if (!ZapUnusedHeapArea) {
105              touched_words = MIN2((size_t)align_object_size(typeArrayOopDesc::header_size(T_INT)),
106                touched_words);
107            }
108#endif
109            MemRegion invalid;
110            HeapWord *crossing_start = align_up((HeapWord*)cur_top, os::vm_page_size());
111            HeapWord *crossing_end = align_down((HeapWord*)(cur_top + touched_words), os::vm_page_size());
112            if (crossing_start != crossing_end) {
113              // If object header crossed a small page boundary we mark the area
114              // as invalid rounding it to a page_size().
115              HeapWord *start = MAX2(align_down((HeapWord*)cur_top, page_size()), s->bottom());
116              HeapWord *end = MIN2(align_up((HeapWord*)(cur_top + touched_words), page_size()), s->end());
117              invalid = MemRegion(start, end);
118            }
119
120            ls->add_invalid_region(invalid);
121          }
122          cur_top = cur_top + (words_to_fill * HeapWordSize);
123          words_left_to_fill -= words_to_fill;
124        }
125      }
126    } else {
127      if (!os::numa_has_static_binding()) {
128#ifdef ASSERT
129        MemRegion invalid(s->top(), s->end());
130        ls->add_invalid_region(invalid);
131#else
132        if (ZapUnusedHeapArea) {
133          MemRegion invalid(s->top(), s->end());
134          ls->add_invalid_region(invalid);
135        } else {
136          return;
137        }
138#endif
139      } else {
140          return;
141      }
142    }
143  }
144}
145
146size_t MutableNUMASpace::used_in_words() const {
147  size_t s = 0;
148  for (int i = 0; i < lgrp_spaces()->length(); i++) {
149    s += lgrp_spaces()->at(i)->space()->used_in_words();
150  }
151  return s;
152}
153
154size_t MutableNUMASpace::free_in_words() const {
155  size_t s = 0;
156  for (int i = 0; i < lgrp_spaces()->length(); i++) {
157    s += lgrp_spaces()->at(i)->space()->free_in_words();
158  }
159  return s;
160}
161
162
163size_t MutableNUMASpace::tlab_capacity(Thread *thr) const {
164  guarantee(thr != NULL, "No thread");
165  int lgrp_id = thr->lgrp_id();
166  if (lgrp_id == -1) {
167    // This case can occur after the topology of the system has
168    // changed. Thread can change their location, the new home
169    // group will be determined during the first allocation
170    // attempt. For now we can safely assume that all spaces
171    // have equal size because the whole space will be reinitialized.
172    if (lgrp_spaces()->length() > 0) {
173      return capacity_in_bytes() / lgrp_spaces()->length();
174    } else {
175      assert(false, "There should be at least one locality group");
176      return 0;
177    }
178  }
179  // That's the normal case, where we know the locality group of the thread.
180  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
181  if (i == -1) {
182    return 0;
183  }
184  return lgrp_spaces()->at(i)->space()->capacity_in_bytes();
185}
186
187size_t MutableNUMASpace::tlab_used(Thread *thr) const {
188  // Please see the comments for tlab_capacity().
189  guarantee(thr != NULL, "No thread");
190  int lgrp_id = thr->lgrp_id();
191  if (lgrp_id == -1) {
192    if (lgrp_spaces()->length() > 0) {
193      return (used_in_bytes()) / lgrp_spaces()->length();
194    } else {
195      assert(false, "There should be at least one locality group");
196      return 0;
197    }
198  }
199  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
200  if (i == -1) {
201    return 0;
202  }
203  return lgrp_spaces()->at(i)->space()->used_in_bytes();
204}
205
206
207size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const {
208  // Please see the comments for tlab_capacity().
209  guarantee(thr != NULL, "No thread");
210  int lgrp_id = thr->lgrp_id();
211  if (lgrp_id == -1) {
212    if (lgrp_spaces()->length() > 0) {
213      return free_in_bytes() / lgrp_spaces()->length();
214    } else {
215      assert(false, "There should be at least one locality group");
216      return 0;
217    }
218  }
219  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
220  if (i == -1) {
221    return 0;
222  }
223  return lgrp_spaces()->at(i)->space()->free_in_bytes();
224}
225
226
227size_t MutableNUMASpace::capacity_in_words(Thread* thr) const {
228  guarantee(thr != NULL, "No thread");
229  int lgrp_id = thr->lgrp_id();
230  if (lgrp_id == -1) {
231    if (lgrp_spaces()->length() > 0) {
232      return capacity_in_words() / lgrp_spaces()->length();
233    } else {
234      assert(false, "There should be at least one locality group");
235      return 0;
236    }
237  }
238  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
239  if (i == -1) {
240    return 0;
241  }
242  return lgrp_spaces()->at(i)->space()->capacity_in_words();
243}
244
245// Check if the NUMA topology has changed. Add and remove spaces if needed.
246// The update can be forced by setting the force parameter equal to true.
247bool MutableNUMASpace::update_layout(bool force) {
248  // Check if the topology had changed.
249  bool changed = os::numa_topology_changed();
250  if (force || changed) {
251    // Compute lgrp intersection. Add/remove spaces.
252    int lgrp_limit = (int)os::numa_get_groups_num();
253    int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit, mtGC);
254    int lgrp_num = (int)os::numa_get_leaf_groups(lgrp_ids, lgrp_limit);
255    assert(lgrp_num > 0, "There should be at least one locality group");
256    // Add new spaces for the new nodes
257    for (int i = 0; i < lgrp_num; i++) {
258      bool found = false;
259      for (int j = 0; j < lgrp_spaces()->length(); j++) {
260        if (lgrp_spaces()->at(j)->lgrp_id() == lgrp_ids[i]) {
261          found = true;
262          break;
263        }
264      }
265      if (!found) {
266        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i], alignment()));
267      }
268    }
269
270    // Remove spaces for the removed nodes.
271    for (int i = 0; i < lgrp_spaces()->length();) {
272      bool found = false;
273      for (int j = 0; j < lgrp_num; j++) {
274        if (lgrp_spaces()->at(i)->lgrp_id() == lgrp_ids[j]) {
275          found = true;
276          break;
277        }
278      }
279      if (!found) {
280        delete lgrp_spaces()->at(i);
281        lgrp_spaces()->remove_at(i);
282      } else {
283        i++;
284      }
285    }
286
287    FREE_C_HEAP_ARRAY(int, lgrp_ids);
288
289    if (changed) {
290      for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
291        thread->set_lgrp_id(-1);
292      }
293    }
294    return true;
295  }
296  return false;
297}
298
299// Bias region towards the first-touching lgrp. Set the right page sizes.
300void MutableNUMASpace::bias_region(MemRegion mr, int lgrp_id) {
301  HeapWord *start = align_up(mr.start(), page_size());
302  HeapWord *end = align_down(mr.end(), page_size());
303  if (end > start) {
304    MemRegion aligned_region(start, end);
305    assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
306           (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
307    assert(region().contains(aligned_region), "Sanity");
308    // First we tell the OS which page size we want in the given range. The underlying
309    // large page can be broken down if we require small pages.
310    os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
311    // Then we uncommit the pages in the range.
312    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
313    // And make them local/first-touch biased.
314    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
315  }
316}
317
318// Free all pages in the region.
319void MutableNUMASpace::free_region(MemRegion mr) {
320  HeapWord *start = align_up(mr.start(), page_size());
321  HeapWord *end = align_down(mr.end(), page_size());
322  if (end > start) {
323    MemRegion aligned_region(start, end);
324    assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
325           (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
326    assert(region().contains(aligned_region), "Sanity");
327    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
328  }
329}
330
331// Update space layout. Perform adaptation.
332void MutableNUMASpace::update() {
333  if (update_layout(false)) {
334    // If the topology has changed, make all chunks zero-sized.
335    // And clear the alloc-rate statistics.
336    // In future we may want to handle this more gracefully in order
337    // to avoid the reallocation of the pages as much as possible.
338    for (int i = 0; i < lgrp_spaces()->length(); i++) {
339      LGRPSpace *ls = lgrp_spaces()->at(i);
340      MutableSpace *s = ls->space();
341      s->set_end(s->bottom());
342      s->set_top(s->bottom());
343      ls->clear_alloc_rate();
344    }
345    // A NUMA space is never mangled
346    initialize(region(),
347               SpaceDecorator::Clear,
348               SpaceDecorator::DontMangle);
349  } else {
350    bool should_initialize = false;
351    if (!os::numa_has_static_binding()) {
352      for (int i = 0; i < lgrp_spaces()->length(); i++) {
353        if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
354          should_initialize = true;
355          break;
356        }
357      }
358    }
359
360    if (should_initialize ||
361        (UseAdaptiveNUMAChunkSizing && adaptation_cycles() < samples_count())) {
362      // A NUMA space is never mangled
363      initialize(region(),
364                 SpaceDecorator::Clear,
365                 SpaceDecorator::DontMangle);
366    }
367  }
368
369  if (NUMAStats) {
370    for (int i = 0; i < lgrp_spaces()->length(); i++) {
371      lgrp_spaces()->at(i)->accumulate_statistics(page_size());
372    }
373  }
374
375  scan_pages(NUMAPageScanRate);
376}
377
378// Scan pages. Free pages that have smaller size or wrong placement.
379void MutableNUMASpace::scan_pages(size_t page_count)
380{
381  size_t pages_per_chunk = page_count / lgrp_spaces()->length();
382  if (pages_per_chunk > 0) {
383    for (int i = 0; i < lgrp_spaces()->length(); i++) {
384      LGRPSpace *ls = lgrp_spaces()->at(i);
385      ls->scan_pages(page_size(), pages_per_chunk);
386    }
387  }
388}
389
390// Accumulate statistics about the allocation rate of each lgrp.
391void MutableNUMASpace::accumulate_statistics() {
392  if (UseAdaptiveNUMAChunkSizing) {
393    for (int i = 0; i < lgrp_spaces()->length(); i++) {
394      lgrp_spaces()->at(i)->sample();
395    }
396    increment_samples_count();
397  }
398
399  if (NUMAStats) {
400    for (int i = 0; i < lgrp_spaces()->length(); i++) {
401      lgrp_spaces()->at(i)->accumulate_statistics(page_size());
402    }
403  }
404}
405
406// Get the current size of a chunk.
407// This function computes the size of the chunk based on the
408// difference between chunk ends. This allows it to work correctly in
409// case the whole space is resized and during the process of adaptive
410// chunk resizing.
411size_t MutableNUMASpace::current_chunk_size(int i) {
412  HeapWord *cur_end, *prev_end;
413  if (i == 0) {
414    prev_end = bottom();
415  } else {
416    prev_end = lgrp_spaces()->at(i - 1)->space()->end();
417  }
418  if (i == lgrp_spaces()->length() - 1) {
419    cur_end = end();
420  } else {
421    cur_end = lgrp_spaces()->at(i)->space()->end();
422  }
423  if (cur_end > prev_end) {
424    return pointer_delta(cur_end, prev_end, sizeof(char));
425  }
426  return 0;
427}
428
429// Return the default chunk size by equally diving the space.
430// page_size() aligned.
431size_t MutableNUMASpace::default_chunk_size() {
432  return base_space_size() / lgrp_spaces()->length() * page_size();
433}
434
435// Produce a new chunk size. page_size() aligned.
436// This function is expected to be called on sequence of i's from 0 to
437// lgrp_spaces()->length().
438size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) {
439  size_t pages_available = base_space_size();
440  for (int j = 0; j < i; j++) {
441    pages_available -= align_down(current_chunk_size(j), page_size()) / page_size();
442  }
443  pages_available -= lgrp_spaces()->length() - i - 1;
444  assert(pages_available > 0, "No pages left");
445  float alloc_rate = 0;
446  for (int j = i; j < lgrp_spaces()->length(); j++) {
447    alloc_rate += lgrp_spaces()->at(j)->alloc_rate()->average();
448  }
449  size_t chunk_size = 0;
450  if (alloc_rate > 0) {
451    LGRPSpace *ls = lgrp_spaces()->at(i);
452    chunk_size = (size_t)(ls->alloc_rate()->average() / alloc_rate * pages_available) * page_size();
453  }
454  chunk_size = MAX2(chunk_size, page_size());
455
456  if (limit > 0) {
457    limit = align_down(limit, page_size());
458    if (chunk_size > current_chunk_size(i)) {
459      size_t upper_bound = pages_available * page_size();
460      if (upper_bound > limit &&
461          current_chunk_size(i) < upper_bound - limit) {
462        // The resulting upper bound should not exceed the available
463        // amount of memory (pages_available * page_size()).
464        upper_bound = current_chunk_size(i) + limit;
465      }
466      chunk_size = MIN2(chunk_size, upper_bound);
467    } else {
468      size_t lower_bound = page_size();
469      if (current_chunk_size(i) > limit) { // lower_bound shouldn't underflow.
470        lower_bound = current_chunk_size(i) - limit;
471      }
472      chunk_size = MAX2(chunk_size, lower_bound);
473    }
474  }
475  assert(chunk_size <= pages_available * page_size(), "Chunk size out of range");
476  return chunk_size;
477}
478
479
480// Return the bottom_region and the top_region. Align them to page_size() boundary.
481// |------------------new_region---------------------------------|
482// |----bottom_region--|---intersection---|------top_region------|
483void MutableNUMASpace::select_tails(MemRegion new_region, MemRegion intersection,
484                                    MemRegion* bottom_region, MemRegion *top_region) {
485  // Is there bottom?
486  if (new_region.start() < intersection.start()) { // Yes
487    // Try to coalesce small pages into a large one.
488    if (UseLargePages && page_size() >= alignment()) {
489      HeapWord* p = align_up(intersection.start(), alignment());
490      if (new_region.contains(p)
491          && pointer_delta(p, new_region.start(), sizeof(char)) >= alignment()) {
492        if (intersection.contains(p)) {
493          intersection = MemRegion(p, intersection.end());
494        } else {
495          intersection = MemRegion(p, p);
496        }
497      }
498    }
499    *bottom_region = MemRegion(new_region.start(), intersection.start());
500  } else {
501    *bottom_region = MemRegion();
502  }
503
504  // Is there top?
505  if (intersection.end() < new_region.end()) { // Yes
506    // Try to coalesce small pages into a large one.
507    if (UseLargePages && page_size() >= alignment()) {
508      HeapWord* p = align_down(intersection.end(), alignment());
509      if (new_region.contains(p)
510          && pointer_delta(new_region.end(), p, sizeof(char)) >= alignment()) {
511        if (intersection.contains(p)) {
512          intersection = MemRegion(intersection.start(), p);
513        } else {
514          intersection = MemRegion(p, p);
515        }
516      }
517    }
518    *top_region = MemRegion(intersection.end(), new_region.end());
519  } else {
520    *top_region = MemRegion();
521  }
522}
523
524// Try to merge the invalid region with the bottom or top region by decreasing
525// the intersection area. Return the invalid_region aligned to the page_size()
526// boundary if it's inside the intersection. Return non-empty invalid_region
527// if it lies inside the intersection (also page-aligned).
528// |------------------new_region---------------------------------|
529// |----------------|-------invalid---|--------------------------|
530// |----bottom_region--|---intersection---|------top_region------|
531void MutableNUMASpace::merge_regions(MemRegion new_region, MemRegion* intersection,
532                                     MemRegion *invalid_region) {
533  if (intersection->start() >= invalid_region->start() && intersection->contains(invalid_region->end())) {
534    *intersection = MemRegion(invalid_region->end(), intersection->end());
535    *invalid_region = MemRegion();
536  } else
537    if (intersection->end() <= invalid_region->end() && intersection->contains(invalid_region->start())) {
538      *intersection = MemRegion(intersection->start(), invalid_region->start());
539      *invalid_region = MemRegion();
540    } else
541      if (intersection->equals(*invalid_region) || invalid_region->contains(*intersection)) {
542        *intersection = MemRegion(new_region.start(), new_region.start());
543        *invalid_region = MemRegion();
544      } else
545        if (intersection->contains(invalid_region)) {
546            // That's the only case we have to make an additional bias_region() call.
547            HeapWord* start = invalid_region->start();
548            HeapWord* end = invalid_region->end();
549            if (UseLargePages && page_size() >= alignment()) {
550              HeapWord *p = align_down(start, alignment());
551              if (new_region.contains(p)) {
552                start = p;
553              }
554              p = align_up(end, alignment());
555              if (new_region.contains(end)) {
556                end = p;
557              }
558            }
559            if (intersection->start() > start) {
560              *intersection = MemRegion(start, intersection->end());
561            }
562            if (intersection->end() < end) {
563              *intersection = MemRegion(intersection->start(), end);
564            }
565            *invalid_region = MemRegion(start, end);
566        }
567}
568
569void MutableNUMASpace::initialize(MemRegion mr,
570                                  bool clear_space,
571                                  bool mangle_space,
572                                  bool setup_pages) {
573  assert(clear_space, "Reallocation will destroy data!");
574  assert(lgrp_spaces()->length() > 0, "There should be at least one space");
575
576  MemRegion old_region = region(), new_region;
577  set_bottom(mr.start());
578  set_end(mr.end());
579  // Must always clear the space
580  clear(SpaceDecorator::DontMangle);
581
582  // Compute chunk sizes
583  size_t prev_page_size = page_size();
584  set_page_size(UseLargePages ? alignment() : os::vm_page_size());
585  HeapWord* rounded_bottom = align_up(bottom(), page_size());
586  HeapWord* rounded_end = align_down(end(), page_size());
587  size_t base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
588
589  // Try small pages if the chunk size is too small
590  if (base_space_size_pages / lgrp_spaces()->length() == 0
591      && page_size() > (size_t)os::vm_page_size()) {
592    // Changing the page size below can lead to freeing of memory. So we fail initialization.
593    if (_must_use_large_pages) {
594      vm_exit_during_initialization("Failed initializing NUMA with large pages. Too small heap size");
595    }
596    set_page_size(os::vm_page_size());
597    rounded_bottom = align_up(bottom(), page_size());
598    rounded_end = align_down(end(), page_size());
599    base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
600  }
601  guarantee(base_space_size_pages / lgrp_spaces()->length() > 0, "Space too small");
602  set_base_space_size(base_space_size_pages);
603
604  // Handle space resize
605  MemRegion top_region, bottom_region;
606  if (!old_region.equals(region())) {
607    new_region = MemRegion(rounded_bottom, rounded_end);
608    MemRegion intersection = new_region.intersection(old_region);
609    if (intersection.start() == NULL ||
610        intersection.end() == NULL   ||
611        prev_page_size > page_size()) { // If the page size got smaller we have to change
612                                        // the page size preference for the whole space.
613      intersection = MemRegion(new_region.start(), new_region.start());
614    }
615    select_tails(new_region, intersection, &bottom_region, &top_region);
616    bias_region(bottom_region, lgrp_spaces()->at(0)->lgrp_id());
617    bias_region(top_region, lgrp_spaces()->at(lgrp_spaces()->length() - 1)->lgrp_id());
618  }
619
620  // Check if the space layout has changed significantly?
621  // This happens when the space has been resized so that either head or tail
622  // chunk became less than a page.
623  bool layout_valid = UseAdaptiveNUMAChunkSizing          &&
624                      current_chunk_size(0) > page_size() &&
625                      current_chunk_size(lgrp_spaces()->length() - 1) > page_size();
626
627
628  for (int i = 0; i < lgrp_spaces()->length(); i++) {
629    LGRPSpace *ls = lgrp_spaces()->at(i);
630    MutableSpace *s = ls->space();
631    old_region = s->region();
632
633    size_t chunk_byte_size = 0, old_chunk_byte_size = 0;
634    if (i < lgrp_spaces()->length() - 1) {
635      if (!UseAdaptiveNUMAChunkSizing                                ||
636          (UseAdaptiveNUMAChunkSizing && NUMAChunkResizeWeight == 0) ||
637           samples_count() < AdaptiveSizePolicyReadyThreshold) {
638        // No adaptation. Divide the space equally.
639        chunk_byte_size = default_chunk_size();
640      } else
641        if (!layout_valid || NUMASpaceResizeRate == 0) {
642          // Fast adaptation. If no space resize rate is set, resize
643          // the chunks instantly.
644          chunk_byte_size = adaptive_chunk_size(i, 0);
645        } else {
646          // Slow adaptation. Resize the chunks moving no more than
647          // NUMASpaceResizeRate bytes per collection.
648          size_t limit = NUMASpaceResizeRate /
649                         (lgrp_spaces()->length() * (lgrp_spaces()->length() + 1) / 2);
650          chunk_byte_size = adaptive_chunk_size(i, MAX2(limit * (i + 1), page_size()));
651        }
652
653      assert(chunk_byte_size >= page_size(), "Chunk size too small");
654      assert(chunk_byte_size <= capacity_in_bytes(), "Sanity check");
655    }
656
657    if (i == 0) { // Bottom chunk
658      if (i != lgrp_spaces()->length() - 1) {
659        new_region = MemRegion(bottom(), rounded_bottom + (chunk_byte_size >> LogHeapWordSize));
660      } else {
661        new_region = MemRegion(bottom(), end());
662      }
663    } else
664      if (i < lgrp_spaces()->length() - 1) { // Middle chunks
665        MutableSpace *ps = lgrp_spaces()->at(i - 1)->space();
666        new_region = MemRegion(ps->end(),
667                               ps->end() + (chunk_byte_size >> LogHeapWordSize));
668      } else { // Top chunk
669        MutableSpace *ps = lgrp_spaces()->at(i - 1)->space();
670        new_region = MemRegion(ps->end(), end());
671      }
672    guarantee(region().contains(new_region), "Region invariant");
673
674
675    // The general case:
676    // |---------------------|--invalid---|--------------------------|
677    // |------------------new_region---------------------------------|
678    // |----bottom_region--|---intersection---|------top_region------|
679    //                     |----old_region----|
680    // The intersection part has all pages in place we don't need to migrate them.
681    // Pages for the top and bottom part should be freed and then reallocated.
682
683    MemRegion intersection = old_region.intersection(new_region);
684
685    if (intersection.start() == NULL || intersection.end() == NULL) {
686      intersection = MemRegion(new_region.start(), new_region.start());
687    }
688
689    if (!os::numa_has_static_binding()) {
690      MemRegion invalid_region = ls->invalid_region().intersection(new_region);
691      // Invalid region is a range of memory that could've possibly
692      // been allocated on the other node. That's relevant only on Solaris where
693      // there is no static memory binding.
694      if (!invalid_region.is_empty()) {
695        merge_regions(new_region, &intersection, &invalid_region);
696        free_region(invalid_region);
697        ls->set_invalid_region(MemRegion());
698      }
699    }
700
701    select_tails(new_region, intersection, &bottom_region, &top_region);
702
703    if (!os::numa_has_static_binding()) {
704      // If that's a system with the first-touch policy then it's enough
705      // to free the pages.
706      free_region(bottom_region);
707      free_region(top_region);
708    } else {
709      // In a system with static binding we have to change the bias whenever
710      // we reshape the heap.
711      bias_region(bottom_region, ls->lgrp_id());
712      bias_region(top_region, ls->lgrp_id());
713    }
714
715    // Clear space (set top = bottom) but never mangle.
716    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle, MutableSpace::DontSetupPages);
717
718    set_adaptation_cycles(samples_count());
719  }
720}
721
722// Set the top of the whole space.
723// Mark the the holes in chunks below the top() as invalid.
724void MutableNUMASpace::set_top(HeapWord* value) {
725  bool found_top = false;
726  for (int i = 0; i < lgrp_spaces()->length();) {
727    LGRPSpace *ls = lgrp_spaces()->at(i);
728    MutableSpace *s = ls->space();
729    HeapWord *top = MAX2(align_down(s->top(), page_size()), s->bottom());
730
731    if (s->contains(value)) {
732      // Check if setting the chunk's top to a given value would create a hole less than
733      // a minimal object; assuming that's not the last chunk in which case we don't care.
734      if (i < lgrp_spaces()->length() - 1) {
735        size_t remainder = pointer_delta(s->end(), value);
736        const size_t min_fill_size = CollectedHeap::min_fill_size();
737        if (remainder < min_fill_size && remainder > 0) {
738          // Add a minimum size filler object; it will cross the chunk boundary.
739          CollectedHeap::fill_with_object(value, min_fill_size);
740          value += min_fill_size;
741          assert(!s->contains(value), "Should be in the next chunk");
742          // Restart the loop from the same chunk, since the value has moved
743          // to the next one.
744          continue;
745        }
746      }
747
748      if (!os::numa_has_static_binding() && top < value && top < s->end()) {
749        ls->add_invalid_region(MemRegion(top, value));
750      }
751      s->set_top(value);
752      found_top = true;
753    } else {
754        if (found_top) {
755            s->set_top(s->bottom());
756        } else {
757          if (!os::numa_has_static_binding() && top < s->end()) {
758            ls->add_invalid_region(MemRegion(top, s->end()));
759          }
760          s->set_top(s->end());
761        }
762    }
763    i++;
764  }
765  MutableSpace::set_top(value);
766}
767
768void MutableNUMASpace::clear(bool mangle_space) {
769  MutableSpace::set_top(bottom());
770  for (int i = 0; i < lgrp_spaces()->length(); i++) {
771    // Never mangle NUMA spaces because the mangling will
772    // bind the memory to a possibly unwanted lgroup.
773    lgrp_spaces()->at(i)->space()->clear(SpaceDecorator::DontMangle);
774  }
775}
776
777/*
778   Linux supports static memory binding, therefore the most part of the
779   logic dealing with the possible invalid page allocation is effectively
780   disabled. Besides there is no notion of the home node in Linux. A
781   thread is allowed to migrate freely. Although the scheduler is rather
782   reluctant to move threads between the nodes. We check for the current
783   node every allocation. And with a high probability a thread stays on
784   the same node for some time allowing local access to recently allocated
785   objects.
786 */
787
788HeapWord* MutableNUMASpace::allocate(size_t size) {
789  Thread* thr = Thread::current();
790  int lgrp_id = thr->lgrp_id();
791  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
792    lgrp_id = os::numa_get_group_id();
793    thr->set_lgrp_id(lgrp_id);
794  }
795
796  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
797
798  // It is possible that a new CPU has been hotplugged and
799  // we haven't reshaped the space accordingly.
800  if (i == -1) {
801    i = os::random() % lgrp_spaces()->length();
802  }
803
804  LGRPSpace* ls = lgrp_spaces()->at(i);
805  MutableSpace *s = ls->space();
806  HeapWord *p = s->allocate(size);
807
808  if (p != NULL) {
809    size_t remainder = s->free_in_words();
810    if (remainder < CollectedHeap::min_fill_size() && remainder > 0) {
811      s->set_top(s->top() - size);
812      p = NULL;
813    }
814  }
815  if (p != NULL) {
816    if (top() < s->top()) { // Keep _top updated.
817      MutableSpace::set_top(s->top());
818    }
819  }
820  // Make the page allocation happen here if there is no static binding..
821  if (p != NULL && !os::numa_has_static_binding()) {
822    for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
823      *(int*)i = 0;
824    }
825  }
826  if (p == NULL) {
827    ls->set_allocation_failed();
828  }
829  return p;
830}
831
832// This version is lock-free.
833HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
834  Thread* thr = Thread::current();
835  int lgrp_id = thr->lgrp_id();
836  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
837    lgrp_id = os::numa_get_group_id();
838    thr->set_lgrp_id(lgrp_id);
839  }
840
841  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
842  // It is possible that a new CPU has been hotplugged and
843  // we haven't reshaped the space accordingly.
844  if (i == -1) {
845    i = os::random() % lgrp_spaces()->length();
846  }
847  LGRPSpace *ls = lgrp_spaces()->at(i);
848  MutableSpace *s = ls->space();
849  HeapWord *p = s->cas_allocate(size);
850  if (p != NULL) {
851    size_t remainder = pointer_delta(s->end(), p + size);
852    if (remainder < CollectedHeap::min_fill_size() && remainder > 0) {
853      if (s->cas_deallocate(p, size)) {
854        // We were the last to allocate and created a fragment less than
855        // a minimal object.
856        p = NULL;
857      } else {
858        guarantee(false, "Deallocation should always succeed");
859      }
860    }
861  }
862  if (p != NULL) {
863    HeapWord* cur_top, *cur_chunk_top = p + size;
864    while ((cur_top = top()) < cur_chunk_top) { // Keep _top updated.
865      if (Atomic::cmpxchg_ptr(cur_chunk_top, top_addr(), cur_top) == cur_top) {
866        break;
867      }
868    }
869  }
870
871  // Make the page allocation happen here if there is no static binding.
872  if (p != NULL && !os::numa_has_static_binding() ) {
873    for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
874      *(int*)i = 0;
875    }
876  }
877  if (p == NULL) {
878    ls->set_allocation_failed();
879  }
880  return p;
881}
882
883void MutableNUMASpace::print_short_on(outputStream* st) const {
884  MutableSpace::print_short_on(st);
885  st->print(" (");
886  for (int i = 0; i < lgrp_spaces()->length(); i++) {
887    st->print("lgrp %d: ", lgrp_spaces()->at(i)->lgrp_id());
888    lgrp_spaces()->at(i)->space()->print_short_on(st);
889    if (i < lgrp_spaces()->length() - 1) {
890      st->print(", ");
891    }
892  }
893  st->print(")");
894}
895
896void MutableNUMASpace::print_on(outputStream* st) const {
897  MutableSpace::print_on(st);
898  for (int i = 0; i < lgrp_spaces()->length(); i++) {
899    LGRPSpace *ls = lgrp_spaces()->at(i);
900    st->print("    lgrp %d", ls->lgrp_id());
901    ls->space()->print_on(st);
902    if (NUMAStats) {
903      for (int i = 0; i < lgrp_spaces()->length(); i++) {
904        lgrp_spaces()->at(i)->accumulate_statistics(page_size());
905      }
906      st->print("    local/remote/unbiased/uncommitted: " SIZE_FORMAT "K/"
907                SIZE_FORMAT "K/" SIZE_FORMAT "K/" SIZE_FORMAT
908                "K, large/small pages: " SIZE_FORMAT "/" SIZE_FORMAT "\n",
909                ls->space_stats()->_local_space / K,
910                ls->space_stats()->_remote_space / K,
911                ls->space_stats()->_unbiased_space / K,
912                ls->space_stats()->_uncommited_space / K,
913                ls->space_stats()->_large_pages,
914                ls->space_stats()->_small_pages);
915    }
916  }
917}
918
919void MutableNUMASpace::verify() {
920  // This can be called after setting an arbitrary value to the space's top,
921  // so an object can cross the chunk boundary. We ensure the parsability
922  // of the space and just walk the objects in linear fashion.
923  ensure_parsability();
924  MutableSpace::verify();
925}
926
927// Scan pages and gather stats about page placement and size.
928void MutableNUMASpace::LGRPSpace::accumulate_statistics(size_t page_size) {
929  clear_space_stats();
930  char *start = (char*)align_up(space()->bottom(), page_size);
931  char* end = (char*)align_down(space()->end(), page_size);
932  if (start < end) {
933    for (char *p = start; p < end;) {
934      os::page_info info;
935      if (os::get_page_info(p, &info)) {
936        if (info.size > 0) {
937          if (info.size > (size_t)os::vm_page_size()) {
938            space_stats()->_large_pages++;
939          } else {
940            space_stats()->_small_pages++;
941          }
942          if (info.lgrp_id == lgrp_id()) {
943            space_stats()->_local_space += info.size;
944          } else {
945            space_stats()->_remote_space += info.size;
946          }
947          p += info.size;
948        } else {
949          p += os::vm_page_size();
950          space_stats()->_uncommited_space += os::vm_page_size();
951        }
952      } else {
953        return;
954      }
955    }
956  }
957  space_stats()->_unbiased_space = pointer_delta(start, space()->bottom(), sizeof(char)) +
958                                   pointer_delta(space()->end(), end, sizeof(char));
959
960}
961
962// Scan page_count pages and verify if they have the right size and right placement.
963// If invalid pages are found they are freed in hope that subsequent reallocation
964// will be more successful.
965void MutableNUMASpace::LGRPSpace::scan_pages(size_t page_size, size_t page_count)
966{
967  char* range_start = (char*)align_up(space()->bottom(), page_size);
968  char* range_end = (char*)align_down(space()->end(), page_size);
969
970  if (range_start > last_page_scanned() || last_page_scanned() >= range_end) {
971    set_last_page_scanned(range_start);
972  }
973
974  char *scan_start = last_page_scanned();
975  char* scan_end = MIN2(scan_start + page_size * page_count, range_end);
976
977  os::page_info page_expected, page_found;
978  page_expected.size = page_size;
979  page_expected.lgrp_id = lgrp_id();
980
981  char *s = scan_start;
982  while (s < scan_end) {
983    char *e = os::scan_pages(s, (char*)scan_end, &page_expected, &page_found);
984    if (e == NULL) {
985      break;
986    }
987    if (e != scan_end) {
988      assert(e < scan_end, "e: " PTR_FORMAT " scan_end: " PTR_FORMAT, p2i(e), p2i(scan_end));
989
990      if ((page_expected.size != page_size || page_expected.lgrp_id != lgrp_id())
991          && page_expected.size != 0) {
992        os::free_memory(s, pointer_delta(e, s, sizeof(char)), page_size);
993      }
994      page_expected = page_found;
995    }
996    s = e;
997  }
998
999  set_last_page_scanned(scan_end);
1000}
1001