vm_version_ppc.cpp revision 6683:08a2164660fb
1/*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright 2012, 2014 SAP AG. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26#include "precompiled.hpp"
27#include "asm/assembler.inline.hpp"
28#include "asm/macroAssembler.inline.hpp"
29#include "compiler/disassembler.hpp"
30#include "memory/resourceArea.hpp"
31#include "runtime/java.hpp"
32#include "runtime/stubCodeGenerator.hpp"
33#include "utilities/defaultStream.hpp"
34#include "vm_version_ppc.hpp"
35
36# include <sys/sysinfo.h>
37
38int VM_Version::_features = VM_Version::unknown_m;
39int VM_Version::_measured_cache_line_size = 128; // default value
40const char* VM_Version::_features_str = "";
41bool VM_Version::_is_determine_features_test_running = false;
42
43
44#define MSG(flag)   \
45  if (flag && !FLAG_IS_DEFAULT(flag))                                  \
46      jio_fprintf(defaultStream::error_stream(),                       \
47                  "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \
48                  "         -XX:+" #flag " will be disabled!\n");
49
50void VM_Version::initialize() {
51
52  // Test which instructions are supported and measure cache line size.
53  determine_features();
54
55  // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
56  if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
57    if (VM_Version::has_popcntw()) {
58      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
59    } else if (VM_Version::has_cmpb()) {
60      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
61    } else if (VM_Version::has_popcntb()) {
62      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 5);
63    } else {
64      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0);
65    }
66  }
67  guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
68            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
69            "PowerArchitecturePPC64 should be 0, 5, 6 or 7");
70
71  if (!UseSIGTRAP) {
72    MSG(TrapBasedICMissChecks);
73    MSG(TrapBasedNotEntrantChecks);
74    MSG(TrapBasedNullChecks);
75    FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false);
76    FLAG_SET_ERGO(bool, TrapBasedNullChecks,       false);
77    FLAG_SET_ERGO(bool, TrapBasedICMissChecks,     false);
78  }
79
80#ifdef COMPILER2
81  if (!UseSIGTRAP) {
82    MSG(TrapBasedRangeChecks);
83    FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
84  }
85
86  // On Power6 test for section size.
87  if (PowerArchitecturePPC64 == 6) {
88    determine_section_size();
89  // TODO: PPC port } else {
90  // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
91  }
92
93  MaxVectorSize = 8;
94#endif
95
96  // Create and print feature-string.
97  char buf[(num_features+1) * 16]; // Max 16 chars per feature.
98  jio_snprintf(buf, sizeof(buf),
99               "ppc64%s%s%s%s%s%s%s%s",
100               (has_fsqrt()   ? " fsqrt"   : ""),
101               (has_isel()    ? " isel"    : ""),
102               (has_lxarxeh() ? " lxarxeh" : ""),
103               (has_cmpb()    ? " cmpb"    : ""),
104               //(has_mftgpr()? " mftgpr"  : ""),
105               (has_popcntb() ? " popcntb" : ""),
106               (has_popcntw() ? " popcntw" : ""),
107               (has_fcfids()  ? " fcfids"  : ""),
108               (has_vand()    ? " vand"    : "")
109               // Make sure number of %s matches num_features!
110              );
111  _features_str = strdup(buf);
112  NOT_PRODUCT(if (Verbose) print_features(););
113
114  // PPC64 supports 8-byte compare-exchange operations (see
115  // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
116  // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
117  _supports_cx8 = true;
118
119  UseSSE = 0; // Only on x86 and x64
120
121  intx cache_line_size = _measured_cache_line_size;
122
123  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;
124
125  if (AllocatePrefetchStyle == 4) {
126    AllocatePrefetchStepSize = cache_line_size; // Need exact value.
127    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default.
128    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined?
129  } else {
130    if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size;
131    if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value.
132    if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined?
133  }
134
135  assert(AllocatePrefetchLines > 0, "invalid value");
136  if (AllocatePrefetchLines < 1) // Set valid value in product VM.
137    AllocatePrefetchLines = 1; // Conservative value.
138
139  if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size)
140    AllocatePrefetchStyle = 1; // Fall back if inappropriate.
141
142  assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
143}
144
145void VM_Version::print_features() {
146  tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
147}
148
149#ifdef COMPILER2
150// Determine section size on power6: If section size is 8 instructions,
151// there should be a difference between the two testloops of ~15 %. If
152// no difference is detected the section is assumed to be 32 instructions.
153void VM_Version::determine_section_size() {
154
155  int unroll = 80;
156
157  const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord;
158
159  // Allocate space for the code.
160  ResourceMark rm;
161  CodeBuffer cb("detect_section_size", code_size, 0);
162  MacroAssembler* a = new MacroAssembler(&cb);
163
164  uint32_t *code = (uint32_t *)a->pc();
165  // Emit code.
166  void (*test1)() = (void(*)())(void *)a->function_entry();
167
168  Label l1;
169
170  a->li(R4, 1);
171  a->sldi(R4, R4, 28);
172  a->b(l1);
173  a->align(CodeEntryAlignment);
174
175  a->bind(l1);
176
177  for (int i = 0; i < unroll; i++) {
178    // Schleife 1
179    // ------- sector 0 ------------
180    // ;; 0
181    a->nop();                   // 1
182    a->fpnop0();                // 2
183    a->fpnop1();                // 3
184    a->addi(R4,R4, -1); // 4
185
186    // ;;  1
187    a->nop();                   // 5
188    a->fmr(F6, F6);             // 6
189    a->fmr(F7, F7);             // 7
190    a->endgroup();              // 8
191    // ------- sector 8 ------------
192
193    // ;;  2
194    a->nop();                   // 9
195    a->nop();                   // 10
196    a->fmr(F8, F8);             // 11
197    a->fmr(F9, F9);             // 12
198
199    // ;;  3
200    a->nop();                   // 13
201    a->fmr(F10, F10);           // 14
202    a->fmr(F11, F11);           // 15
203    a->endgroup();              // 16
204    // -------- sector 16 -------------
205
206    // ;;  4
207    a->nop();                   // 17
208    a->nop();                   // 18
209    a->fmr(F15, F15);           // 19
210    a->fmr(F16, F16);           // 20
211
212    // ;;  5
213    a->nop();                   // 21
214    a->fmr(F17, F17);           // 22
215    a->fmr(F18, F18);           // 23
216    a->endgroup();              // 24
217    // ------- sector 24  ------------
218
219    // ;;  6
220    a->nop();                   // 25
221    a->nop();                   // 26
222    a->fmr(F19, F19);           // 27
223    a->fmr(F20, F20);           // 28
224
225    // ;;  7
226    a->nop();                   // 29
227    a->fmr(F21, F21);           // 30
228    a->fmr(F22, F22);           // 31
229    a->brnop0();                // 32
230
231    // ------- sector 32 ------------
232  }
233
234  // ;; 8
235  a->cmpdi(CCR0, R4, unroll);   // 33
236  a->bge(CCR0, l1);             // 34
237  a->blr();
238
239  // Emit code.
240  void (*test2)() = (void(*)())(void *)a->function_entry();
241  // uint32_t *code = (uint32_t *)a->pc();
242
243  Label l2;
244
245  a->li(R4, 1);
246  a->sldi(R4, R4, 28);
247  a->b(l2);
248  a->align(CodeEntryAlignment);
249
250  a->bind(l2);
251
252  for (int i = 0; i < unroll; i++) {
253    // Schleife 2
254    // ------- sector 0 ------------
255    // ;; 0
256    a->brnop0();                  // 1
257    a->nop();                     // 2
258    //a->cmpdi(CCR0, R4, unroll);
259    a->fpnop0();                  // 3
260    a->fpnop1();                  // 4
261    a->addi(R4,R4, -1);           // 5
262
263    // ;; 1
264
265    a->nop();                     // 6
266    a->fmr(F6, F6);               // 7
267    a->fmr(F7, F7);               // 8
268    // ------- sector 8 ---------------
269
270    // ;; 2
271    a->endgroup();                // 9
272
273    // ;; 3
274    a->nop();                     // 10
275    a->nop();                     // 11
276    a->fmr(F8, F8);               // 12
277
278    // ;; 4
279    a->fmr(F9, F9);               // 13
280    a->nop();                     // 14
281    a->fmr(F10, F10);             // 15
282
283    // ;; 5
284    a->fmr(F11, F11);             // 16
285    // -------- sector 16 -------------
286
287    // ;; 6
288    a->endgroup();                // 17
289
290    // ;; 7
291    a->nop();                     // 18
292    a->nop();                     // 19
293    a->fmr(F15, F15);             // 20
294
295    // ;; 8
296    a->fmr(F16, F16);             // 21
297    a->nop();                     // 22
298    a->fmr(F17, F17);             // 23
299
300    // ;; 9
301    a->fmr(F18, F18);             // 24
302    // -------- sector 24 -------------
303
304    // ;; 10
305    a->endgroup();                // 25
306
307    // ;; 11
308    a->nop();                     // 26
309    a->nop();                     // 27
310    a->fmr(F19, F19);             // 28
311
312    // ;; 12
313    a->fmr(F20, F20);             // 29
314    a->nop();                     // 30
315    a->fmr(F21, F21);             // 31
316
317    // ;; 13
318    a->fmr(F22, F22);             // 32
319  }
320
321  // -------- sector 32 -------------
322  // ;; 14
323  a->cmpdi(CCR0, R4, unroll); // 33
324  a->bge(CCR0, l2);           // 34
325
326  a->blr();
327  uint32_t *code_end = (uint32_t *)a->pc();
328  a->flush();
329
330  double loop1_seconds,loop2_seconds, rel_diff;
331  uint64_t start1, stop1;
332
333  start1 = os::current_thread_cpu_time(false);
334  (*test1)();
335  stop1 = os::current_thread_cpu_time(false);
336  loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0);
337
338
339  start1 = os::current_thread_cpu_time(false);
340  (*test2)();
341  stop1 = os::current_thread_cpu_time(false);
342
343  loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0);
344
345  rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100;
346
347  if (PrintAssembly) {
348    ttyLocker ttyl;
349    tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
350    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
351    tty->print_cr("Time loop1 :%f", loop1_seconds);
352    tty->print_cr("Time loop2 :%f", loop2_seconds);
353    tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff);
354
355    if (rel_diff > 12.0) {
356      tty->print_cr("Section Size 8 Instructions");
357    } else{
358      tty->print_cr("Section Size 32 Instructions or Power5");
359    }
360  }
361
362#if 0 // TODO: PPC port
363  // Set sector size (if not set explicitly).
364  if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) {
365    if (rel_diff > 12.0) {
366      PdScheduling::power6SectorSize = 0x20;
367    } else {
368      PdScheduling::power6SectorSize = 0x80;
369    }
370  } else if (Power6SectorSize128PPC64) {
371    PdScheduling::power6SectorSize = 0x80;
372  } else {
373    PdScheduling::power6SectorSize = 0x20;
374  }
375#endif
376  if (UsePower6SchedulerPPC64) Unimplemented();
377}
378#endif // COMPILER2
379
380void VM_Version::determine_features() {
381#if defined(ABI_ELFv2)
382  const int code_size = (num_features+1+2*7)*BytesPerInstWord; // TODO(asmundak): calculation is incorrect.
383#else
384  // 7 InstWords for each call (function descriptor + blr instruction).
385  const int code_size = (num_features+1+2*7)*BytesPerInstWord;
386#endif
387  int features = 0;
388
389  // create test area
390  enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
391  char test_area[BUFFER_SIZE];
392  char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
393
394  // Allocate space for the code.
395  ResourceMark rm;
396  CodeBuffer cb("detect_cpu_features", code_size, 0);
397  MacroAssembler* a = new MacroAssembler(&cb);
398
399  // Must be set to true so we can generate the test code.
400  _features = VM_Version::all_features_m;
401
402  // Emit code.
403  void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry();
404  uint32_t *code = (uint32_t *)a->pc();
405  // Don't use R0 in ldarx.
406  // Keep R3_ARG1 unmodified, it contains &field (see below).
407  // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
408  a->fsqrt(F3, F4);                            // code[0] -> fsqrt_m
409  a->fsqrts(F3, F4);                           // code[1] -> fsqrts_m
410  a->isel(R7, R5, R6, 0);                      // code[2] -> isel_m
411  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
412  a->cmpb(R7, R5, R6);                         // code[4] -> bcmp
413  //a->mftgpr(R7, F3);                         // code[5] -> mftgpr
414  a->popcntb(R7, R5);                          // code[6] -> popcntb
415  a->popcntw(R7, R5);                          // code[7] -> popcntw
416  a->fcfids(F3, F4);                           // code[8] -> fcfids
417  a->vand(VR0, VR0, VR0);                      // code[9] -> vand
418  a->blr();
419
420  // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
421  void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
422  a->dcbz(R3_ARG1); // R3_ARG1 = addr
423  a->blr();
424
425  uint32_t *code_end = (uint32_t *)a->pc();
426  a->flush();
427  _features = VM_Version::unknown_m;
428
429  // Print the detection code.
430  if (PrintAssembly) {
431    ttyLocker ttyl;
432    tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
433    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
434  }
435
436  // Measure cache line size.
437  memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
438  (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
439  int count = 0; // count zeroed bytes
440  for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
441  guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
442  _measured_cache_line_size = count;
443
444  // Execute code. Illegal instructions will be replaced by 0 in the signal handler.
445  VM_Version::_is_determine_features_test_running = true;
446  (*test)((address)mid_of_test_area, (uint64_t)0);
447  VM_Version::_is_determine_features_test_running = false;
448
449  // determine which instructions are legal.
450  int feature_cntr = 0;
451  if (code[feature_cntr++]) features |= fsqrt_m;
452  if (code[feature_cntr++]) features |= fsqrts_m;
453  if (code[feature_cntr++]) features |= isel_m;
454  if (code[feature_cntr++]) features |= lxarxeh_m;
455  if (code[feature_cntr++]) features |= cmpb_m;
456  //if(code[feature_cntr++])features |= mftgpr_m;
457  if (code[feature_cntr++]) features |= popcntb_m;
458  if (code[feature_cntr++]) features |= popcntw_m;
459  if (code[feature_cntr++]) features |= fcfids_m;
460  if (code[feature_cntr++]) features |= vand_m;
461
462  // Print the detection code.
463  if (PrintAssembly) {
464    ttyLocker ttyl;
465    tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code));
466    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
467  }
468
469  _features = features;
470}
471
472
473static int saved_features = 0;
474
475void VM_Version::allow_all() {
476  saved_features = _features;
477  _features      = all_features_m;
478}
479
480void VM_Version::revert() {
481  _features = saved_features;
482}
483