150477Speter/* Target Definitions for NVPTX. 216186Salex Copyright (C) 2014-2020 Free Software Foundation, Inc. 316186Salex Contributed by Bernd Schmidt <bernds@codesourcery.com> 488064Sru 588064Sru This file is part of GCC. 616186Salex 716186Salex GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21#ifndef GCC_NVPTX_H 22#define GCC_NVPTX_H 23 24#ifndef NVPTX_OPTS_H 25#include "config/nvptx/nvptx-opts.h" 26#endif 27 28/* Run-time Target. */ 29 30#define STARTFILE_SPEC "%{mmainkernel:crt0.o%s}" 31 32#define ASM_SPEC "%{misa=*:-m %*}" 33 34#define TARGET_CPU_CPP_BUILTINS() \ 35 do \ 36 { \ 37 builtin_assert ("machine=nvptx"); \ 38 builtin_assert ("cpu=nvptx"); \ 39 builtin_define ("__nvptx__"); \ 40 if (TARGET_SOFT_STACK) \ 41 builtin_define ("__nvptx_softstack__"); \ 42 if (TARGET_UNIFORM_SIMT) \ 43 builtin_define ("__nvptx_unisimt__"); \ 44 } while (0) 45 46/* Avoid the default in ../../gcc.c, which adds "-pthread", which is not 47 supported for nvptx. */ 48#define GOMP_SELF_SPECS "" 49 50/* Storage Layout. */ 51 52#define BITS_BIG_ENDIAN 0 53#define BYTES_BIG_ENDIAN 0 54#define WORDS_BIG_ENDIAN 0 55 56/* Chosen such that we won't have to deal with multi-word subregs. */ 57#define UNITS_PER_WORD 8 58 59/* Alignments in bits. */ 60#define PARM_BOUNDARY 32 61#define STACK_BOUNDARY 128 62#define FUNCTION_BOUNDARY 32 63#define BIGGEST_ALIGNMENT 128 64#define STRICT_ALIGNMENT 1 65 66#define MAX_STACK_ALIGNMENT (1024 * 8) 67 68#define DATA_ALIGNMENT nvptx_data_alignment 69 70/* Copied from elf.h and other places. We'd otherwise use 71 BIGGEST_ALIGNMENT and fail a number of testcases. */ 72#define MAX_OFILE_ALIGNMENT (32768 * 8) 73 74/* Type Layout. */ 75 76#define DEFAULT_SIGNED_CHAR 1 77 78#define SHORT_TYPE_SIZE 16 79#define INT_TYPE_SIZE 32 80#define LONG_TYPE_SIZE (TARGET_ABI64 ? 64 : 32) 81#define LONG_LONG_TYPE_SIZE 64 82#define FLOAT_TYPE_SIZE 32 83#define DOUBLE_TYPE_SIZE 64 84#define LONG_DOUBLE_TYPE_SIZE 64 85#define TARGET_SUPPORTS_WIDE_INT 1 86 87#undef SIZE_TYPE 88#define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int") 89#undef PTRDIFF_TYPE 90#define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int") 91 92#define POINTER_SIZE (TARGET_ABI64 ? 64 : 32) 93#define Pmode (TARGET_ABI64 ? DImode : SImode) 94#define STACK_SIZE_MODE Pmode 95 96#define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35) 97 98/* 'TARGET_PTX_*' not applicable before GCC 12. */ 99#define TARGET_PTX_6_0 false 100 101/* Registers. Since ptx is a virtual target, we just define a few 102 hard registers for special purposes and leave pseudos unallocated. 103 We have to have some available hard registers, to keep gcc setup 104 happy. */ 105#define FIRST_PSEUDO_REGISTER 16 106#define FIXED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 107#define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } 108 109/* Register Classes. */ 110enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES }; 111#define REG_CLASS_NAMES { "NO_REGS", "ALL_REGS" } 112#define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } } 113#define N_REG_CLASSES (int) LIM_REG_CLASSES 114 115#define GENERAL_REGS ALL_REGS 116#define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS) 117#define BASE_REG_CLASS ALL_REGS 118#define INDEX_REG_CLASS NO_REGS 119 120#define REGNO_OK_FOR_BASE_P(X) true 121#define REGNO_OK_FOR_INDEX_P(X) false 122 123#define CLASS_MAX_NREGS(class, mode) \ 124 ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) 125 126#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ 127 if ((MODE) == QImode || (MODE) == HImode) \ 128 { \ 129 (MODE) = SImode; \ 130 (void)(UNSIGNEDP); \ 131 (void)(TYPE); \ 132 } 133 134/* Stack and Calling. */ 135 136#define FRAME_GROWS_DOWNWARD 0 137#define STACK_GROWS_DOWNWARD 1 138 139#define NVPTX_RETURN_REGNUM 0 140#define STACK_POINTER_REGNUM 1 141#define FRAME_POINTER_REGNUM 2 142#define ARG_POINTER_REGNUM 3 143#define STATIC_CHAIN_REGNUM 4 144/* This register points to the shared memory location with the current warp's 145 soft stack pointer (__nvptx_stacks[tid.y]). */ 146#define SOFTSTACK_SLOT_REGNUM 5 147/* This register is used to save the previous value of the soft stack pointer 148 in the prologue and restore it when returning. */ 149#define SOFTSTACK_PREV_REGNUM 6 150 151#define REGISTER_NAMES \ 152 { \ 153 "%value", "%stack", "%frame", "%args", \ 154 "%chain", "%sspslot", "%sspprev", "%hr7", \ 155 "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \ 156 } 157 158#define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0) 159#define PUSH_ARGS_REVERSED 1 160#define ACCUMULATE_OUTGOING_ARGS 1 161 162/* Avoid using the argument pointer for frame-related things. */ 163#define FRAME_POINTER_CFA_OFFSET(FNDECL) ((void)(FNDECL), 0) 164 165#ifdef HOST_WIDE_INT 166struct nvptx_args { 167 tree fntype; 168 /* Number of arguments passed in registers so far. */ 169 int count; 170}; 171#endif 172 173#define CUMULATIVE_ARGS struct nvptx_args 174 175#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ 176 ((CUM).fntype = (FNTYPE), (CUM).count = 0, (void)0) 177 178#define FUNCTION_ARG_REGNO_P(r) 0 179 180#define DEFAULT_PCC_STRUCT_RETURN 0 181 182#define FUNCTION_PROFILER(file, labelno) \ 183 fatal_error (input_location, \ 184 "profiling is not yet implemented for this architecture") 185 186#define TRAMPOLINE_SIZE 32 187#define TRAMPOLINE_ALIGNMENT 256 188 189/* We don't run reload, so this isn't actually used, but it still needs to be 190 defined. Showing an argp->fp elimination also stops 191 expand_builtin_setjmp_receiver from generating invalid insns. */ 192#define ELIMINABLE_REGS \ 193 { \ 194 { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM} \ 195 } 196 197/* Define the offset between two registers, one to be eliminated, and the other 198 its replacement, at the start of a routine. */ 199 200#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ 201 ((OFFSET) = 0) 202 203/* Addressing Modes. */ 204 205#define MAX_REGS_PER_ADDRESS 1 206 207#define LEGITIMATE_PIC_OPERAND_P(X) 1 208 209 210#if defined HOST_WIDE_INT 211struct GTY(()) machine_function 212{ 213 rtx_expr_list *call_args; /* Arg list for the current call. */ 214 bool doing_call; /* Within a CALL_ARGS ... CALL_ARGS_END sequence. */ 215 bool is_varadic; /* This call is varadic */ 216 bool has_varadic; /* Current function has a varadic call. */ 217 bool has_chain; /* Current function has outgoing static chain. */ 218 bool has_softstack; /* Current function has a soft stack frame. */ 219 bool has_simtreg; /* Current function has an OpenMP SIMD region. */ 220 int num_args; /* Number of args of current call. */ 221 int return_mode; /* Return mode of current fn. 222 (machine_mode not defined yet.) */ 223 rtx axis_predicate[2]; /* Neutering predicates. */ 224 int axis_dim[2]; /* Maximum number of threads on each axis, dim[0] is 225 vector_length, dim[1] is num_workers. */ 226 bool axis_dim_init_p; 227 rtx bcast_partition; /* Register containing the size of each 228 vector's partition of share-memory used to 229 broadcast state. */ 230 rtx red_partition; /* Similar to bcast_partition, except for vector 231 reductions. */ 232 rtx sync_bar; /* Synchronization barrier ID for vectors. */ 233 rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */ 234 rtx unisimt_predicate; /* Predicate for -muniform-simt. */ 235 rtx unisimt_location; /* Mask location for -muniform-simt. */ 236 /* The following two fields hold the maximum size resp. alignment required 237 for per-lane storage in OpenMP SIMD regions. */ 238 unsigned HOST_WIDE_INT simt_stack_size; 239 unsigned HOST_WIDE_INT simt_stack_align; 240}; 241#endif 242 243/* Costs. */ 244 245#define NO_FUNCTION_CSE 1 246#define SLOW_BYTE_ACCESS 0 247#define BRANCH_COST(speed_p, predictable_p) 6 248 249/* Assembler Format. */ 250 251#undef ASM_DECLARE_FUNCTION_NAME 252#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ 253 nvptx_declare_function_name (FILE, NAME, DECL) 254 255#undef ASM_DECLARE_FUNCTION_SIZE 256#define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \ 257 nvptx_function_end (STREAM) 258 259#define DWARF2_ASM_LINE_DEBUG_INFO 1 260 261#undef ASM_APP_ON 262#define ASM_APP_ON "\t// #APP \n" 263#undef ASM_APP_OFF 264#define ASM_APP_OFF "\t// #NO_APP \n" 265 266#define DBX_REGISTER_NUMBER(N) N 267 268#define TEXT_SECTION_ASM_OP "" 269#define DATA_SECTION_ASM_OP "" 270 271#undef ASM_GENERATE_INTERNAL_LABEL 272#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ 273 do \ 274 { \ 275 char *__p; \ 276 __p = stpcpy (&(LABEL)[1], PREFIX); \ 277 (LABEL)[0] = '$'; \ 278 sprint_ul (__p, (unsigned long) (NUM)); \ 279 } \ 280 while (0) 281 282#define ASM_OUTPUT_ALIGN(FILE, POWER) \ 283 do \ 284 { \ 285 (void) (FILE); \ 286 (void) (POWER); \ 287 } \ 288 while (0) 289 290#define ASM_OUTPUT_SKIP(FILE, N) \ 291 nvptx_output_skip (FILE, N) 292 293#undef ASM_OUTPUT_ASCII 294#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \ 295 nvptx_output_ascii (FILE, STR, LENGTH); 296 297#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ 298 nvptx_declare_object_name (FILE, NAME, DECL) 299 300#undef ASM_OUTPUT_ALIGNED_DECL_COMMON 301#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ 302 nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN) 303 304#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL 305#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ 306 nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN) 307 308#define CASE_VECTOR_PC_RELATIVE flag_pic 309#define JUMP_TABLES_IN_TEXT_SECTION flag_pic 310 311#define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2) 312 313/* Misc. */ 314 315#define DWARF2_LINENO_DEBUGGING_INFO 1 316 317#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ 318 ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2) 319#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ 320 ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2) 321 322#define SUPPORTS_WEAK 1 323#define NO_DOT_IN_LABEL 324#define ASM_COMMENT_START "//" 325 326#define STORE_FLAG_VALUE -1 327#define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE)) 328 329#define CASE_VECTOR_MODE SImode 330#define MOVE_MAX 8 331#define MOVE_RATIO(SPEED) 4 332#define FUNCTION_MODE QImode 333#define HAS_INIT_SECTION 1 334 335/* The C++ front end insists to link against libstdc++ -- which we don't build. 336 Tell it to instead link against the innocuous libgcc. */ 337#define LIBSTDCXX "gcc" 338 339#endif /* GCC_NVPTX_H */ 340