150477Speter/* Target Definitions for NVPTX.
216186Salex   Copyright (C) 2014-2020 Free Software Foundation, Inc.
316186Salex   Contributed by Bernd Schmidt <bernds@codesourcery.com>
488064Sru
588064Sru   This file is part of GCC.
616186Salex
716186Salex   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published
9   by the Free Software Foundation; either version 3, or (at your
10   option) any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING3.  If not see
19   <http://www.gnu.org/licenses/>.  */
20
21#ifndef GCC_NVPTX_H
22#define GCC_NVPTX_H
23
24#ifndef NVPTX_OPTS_H
25#include "config/nvptx/nvptx-opts.h"
26#endif
27
28/* Run-time Target.  */
29
30#define STARTFILE_SPEC "%{mmainkernel:crt0.o%s}"
31
32#define ASM_SPEC "%{misa=*:-m %*}"
33
34#define TARGET_CPU_CPP_BUILTINS()		\
35  do						\
36    {						\
37      builtin_assert ("machine=nvptx");		\
38      builtin_assert ("cpu=nvptx");		\
39      builtin_define ("__nvptx__");		\
40      if (TARGET_SOFT_STACK)			\
41        builtin_define ("__nvptx_softstack__");	\
42      if (TARGET_UNIFORM_SIMT)			\
43        builtin_define ("__nvptx_unisimt__");	\
44    } while (0)
45
46/* Avoid the default in ../../gcc.c, which adds "-pthread", which is not
47   supported for nvptx.  */
48#define GOMP_SELF_SPECS ""
49
50/* Storage Layout.  */
51
52#define BITS_BIG_ENDIAN 0
53#define BYTES_BIG_ENDIAN 0
54#define WORDS_BIG_ENDIAN 0
55
56/* Chosen such that we won't have to deal with multi-word subregs.  */
57#define UNITS_PER_WORD 8
58
59/* Alignments in bits.  */
60#define PARM_BOUNDARY 32
61#define STACK_BOUNDARY 128
62#define FUNCTION_BOUNDARY 32
63#define BIGGEST_ALIGNMENT 128
64#define STRICT_ALIGNMENT 1
65
66#define MAX_STACK_ALIGNMENT (1024 * 8)
67
68#define DATA_ALIGNMENT nvptx_data_alignment
69
70/* Copied from elf.h and other places.  We'd otherwise use
71   BIGGEST_ALIGNMENT and fail a number of testcases.  */
72#define MAX_OFILE_ALIGNMENT (32768 * 8)
73
74/* Type Layout.  */
75
76#define DEFAULT_SIGNED_CHAR 1
77
78#define SHORT_TYPE_SIZE 16
79#define INT_TYPE_SIZE 32
80#define LONG_TYPE_SIZE (TARGET_ABI64 ? 64 : 32)
81#define LONG_LONG_TYPE_SIZE 64
82#define FLOAT_TYPE_SIZE 32
83#define DOUBLE_TYPE_SIZE 64
84#define LONG_DOUBLE_TYPE_SIZE 64
85#define TARGET_SUPPORTS_WIDE_INT 1
86
87#undef SIZE_TYPE
88#define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int")
89#undef PTRDIFF_TYPE
90#define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int")
91
92#define POINTER_SIZE (TARGET_ABI64 ? 64 : 32)
93#define Pmode (TARGET_ABI64 ? DImode : SImode)
94#define STACK_SIZE_MODE Pmode
95
96#define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35)
97
98/* 'TARGET_PTX_*' not applicable before GCC 12.  */
99#define TARGET_PTX_6_0 false
100
101/* Registers.  Since ptx is a virtual target, we just define a few
102   hard registers for special purposes and leave pseudos unallocated.
103   We have to have some available hard registers, to keep gcc setup
104   happy.  */
105#define FIRST_PSEUDO_REGISTER 16
106#define FIXED_REGISTERS	    { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
107#define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
108
109/* Register Classes.  */
110enum reg_class             {  NO_REGS,    ALL_REGS,	LIM_REG_CLASSES };
111#define REG_CLASS_NAMES    { "NO_REGS",  "ALL_REGS" }
112#define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } }
113#define N_REG_CLASSES (int) LIM_REG_CLASSES
114
115#define GENERAL_REGS ALL_REGS
116#define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS)
117#define BASE_REG_CLASS ALL_REGS
118#define INDEX_REG_CLASS NO_REGS
119
120#define REGNO_OK_FOR_BASE_P(X) true
121#define REGNO_OK_FOR_INDEX_P(X) false
122
123#define CLASS_MAX_NREGS(class, mode) \
124  ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
125
126#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)		\
127  if ((MODE) == QImode || (MODE) == HImode)		\
128    {							\
129      (MODE) = SImode;					\
130      (void)(UNSIGNEDP);				\
131      (void)(TYPE);					\
132    }
133
134/* Stack and Calling.  */
135
136#define FRAME_GROWS_DOWNWARD 0
137#define STACK_GROWS_DOWNWARD 1
138
139#define NVPTX_RETURN_REGNUM 0
140#define STACK_POINTER_REGNUM 1
141#define FRAME_POINTER_REGNUM 2
142#define ARG_POINTER_REGNUM 3
143#define STATIC_CHAIN_REGNUM 4
144/* This register points to the shared memory location with the current warp's
145   soft stack pointer (__nvptx_stacks[tid.y]).  */
146#define SOFTSTACK_SLOT_REGNUM 5
147/* This register is used to save the previous value of the soft stack pointer
148   in the prologue and restore it when returning.  */
149#define SOFTSTACK_PREV_REGNUM 6
150
151#define REGISTER_NAMES							\
152  {									\
153    "%value", "%stack", "%frame", "%args",                              \
154    "%chain", "%sspslot", "%sspprev", "%hr7",                           \
155    "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \
156  }
157
158#define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0)
159#define PUSH_ARGS_REVERSED 1
160#define ACCUMULATE_OUTGOING_ARGS 1
161
162/* Avoid using the argument pointer for frame-related things.  */
163#define FRAME_POINTER_CFA_OFFSET(FNDECL) ((void)(FNDECL), 0)
164
165#ifdef HOST_WIDE_INT
166struct nvptx_args {
167  tree fntype;
168  /* Number of arguments passed in registers so far.  */
169  int count;
170};
171#endif
172
173#define CUMULATIVE_ARGS struct nvptx_args
174
175#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
176  ((CUM).fntype = (FNTYPE), (CUM).count = 0, (void)0)
177
178#define FUNCTION_ARG_REGNO_P(r) 0
179
180#define DEFAULT_PCC_STRUCT_RETURN 0
181
182#define FUNCTION_PROFILER(file, labelno) \
183  fatal_error (input_location, \
184	       "profiling is not yet implemented for this architecture")
185
186#define TRAMPOLINE_SIZE 32
187#define TRAMPOLINE_ALIGNMENT 256
188
189/* We don't run reload, so this isn't actually used, but it still needs to be
190   defined.  Showing an argp->fp elimination also stops
191   expand_builtin_setjmp_receiver from generating invalid insns.  */
192#define ELIMINABLE_REGS					\
193  {							\
194    { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}	\
195  }
196
197/* Define the offset between two registers, one to be eliminated, and the other
198   its replacement, at the start of a routine.  */
199
200#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
201  ((OFFSET) = 0)
202
203/* Addressing Modes.  */
204
205#define MAX_REGS_PER_ADDRESS 1
206
207#define LEGITIMATE_PIC_OPERAND_P(X) 1
208
209
210#if defined HOST_WIDE_INT
211struct GTY(()) machine_function
212{
213  rtx_expr_list *call_args;  /* Arg list for the current call.  */
214  bool doing_call; /* Within a CALL_ARGS ... CALL_ARGS_END sequence.  */
215  bool is_varadic;  /* This call is varadic  */
216  bool has_varadic;  /* Current function has a varadic call.  */
217  bool has_chain; /* Current function has outgoing static chain.  */
218  bool has_softstack; /* Current function has a soft stack frame.  */
219  bool has_simtreg; /* Current function has an OpenMP SIMD region.  */
220  int num_args;	/* Number of args of current call.  */
221  int return_mode; /* Return mode of current fn.
222		      (machine_mode not defined yet.) */
223  rtx axis_predicate[2]; /* Neutering predicates.  */
224  int axis_dim[2]; /* Maximum number of threads on each axis, dim[0] is
225		      vector_length, dim[1] is num_workers.  */
226  bool axis_dim_init_p;
227  rtx bcast_partition; /* Register containing the size of each
228			  vector's partition of share-memory used to
229			  broadcast state.  */
230  rtx red_partition; /* Similar to bcast_partition, except for vector
231			reductions.  */
232  rtx sync_bar; /* Synchronization barrier ID for vectors.  */
233  rtx unisimt_master; /* 'Master lane index' for -muniform-simt.  */
234  rtx unisimt_predicate; /* Predicate for -muniform-simt.  */
235  rtx unisimt_location; /* Mask location for -muniform-simt.  */
236  /* The following two fields hold the maximum size resp. alignment required
237     for per-lane storage in OpenMP SIMD regions.  */
238  unsigned HOST_WIDE_INT simt_stack_size;
239  unsigned HOST_WIDE_INT simt_stack_align;
240};
241#endif
242
243/* Costs.  */
244
245#define NO_FUNCTION_CSE 1
246#define SLOW_BYTE_ACCESS 0
247#define BRANCH_COST(speed_p, predictable_p) 6
248
249/* Assembler Format.  */
250
251#undef ASM_DECLARE_FUNCTION_NAME
252#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
253  nvptx_declare_function_name (FILE, NAME, DECL)
254
255#undef ASM_DECLARE_FUNCTION_SIZE
256#define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \
257  nvptx_function_end (STREAM)
258
259#define DWARF2_ASM_LINE_DEBUG_INFO 1
260
261#undef ASM_APP_ON
262#define ASM_APP_ON "\t// #APP \n"
263#undef ASM_APP_OFF
264#define ASM_APP_OFF "\t// #NO_APP \n"
265
266#define DBX_REGISTER_NUMBER(N) N
267
268#define TEXT_SECTION_ASM_OP ""
269#define DATA_SECTION_ASM_OP ""
270
271#undef  ASM_GENERATE_INTERNAL_LABEL
272#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
273  do								\
274    {								\
275      char *__p;						\
276      __p = stpcpy (&(LABEL)[1], PREFIX);			\
277      (LABEL)[0] = '$';						\
278      sprint_ul (__p, (unsigned long) (NUM));			\
279    }								\
280  while (0)
281
282#define ASM_OUTPUT_ALIGN(FILE, POWER)		\
283  do						\
284    {						\
285      (void) (FILE);				\
286      (void) (POWER);				\
287    }						\
288  while (0)
289
290#define ASM_OUTPUT_SKIP(FILE, N)		\
291  nvptx_output_skip (FILE, N)
292
293#undef  ASM_OUTPUT_ASCII
294#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)			\
295  nvptx_output_ascii (FILE, STR, LENGTH);
296
297#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)	\
298  nvptx_declare_object_name (FILE, NAME, DECL)
299
300#undef  ASM_OUTPUT_ALIGNED_DECL_COMMON
301#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN)	\
302  nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
303
304#undef  ASM_OUTPUT_ALIGNED_DECL_LOCAL
305#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
306  nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
307
308#define CASE_VECTOR_PC_RELATIVE flag_pic
309#define JUMP_TABLES_IN_TEXT_SECTION flag_pic
310
311#define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2)
312
313/* Misc.  */
314
315#define DWARF2_LINENO_DEBUGGING_INFO 1
316
317#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
318  ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
319#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
320  ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
321
322#define SUPPORTS_WEAK 1
323#define NO_DOT_IN_LABEL
324#define ASM_COMMENT_START "//"
325
326#define STORE_FLAG_VALUE -1
327#define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE))
328
329#define CASE_VECTOR_MODE SImode
330#define MOVE_MAX 8
331#define MOVE_RATIO(SPEED) 4
332#define FUNCTION_MODE QImode
333#define HAS_INIT_SECTION 1
334
335/* The C++ front end insists to link against libstdc++ -- which we don't build.
336   Tell it to instead link against the innocuous libgcc.  */
337#define LIBSTDCXX "gcc"
338
339#endif /* GCC_NVPTX_H */
340