1/*	$OpenBSD: rtld_machine.c,v 1.70 2024/03/30 08:44:20 miod Exp $ */
2
3/*
4 * Copyright (c) 1999 Dale Rahn
5 * Copyright (c) 2001 Niklas Hallqvist
6 * Copyright (c) 2001 Artur Grabowski
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29/*-
30 * Copyright (c) 2000 Eduardo Horvath.
31 * Copyright (c) 1999 The NetBSD Foundation, Inc.
32 * All rights reserved.
33 *
34 * This code is derived from software contributed to The NetBSD Foundation
35 * by Paul Kranenburg.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the NetBSD
48 *	Foundation, Inc. and its contributors.
49 * 4. Neither the name of The NetBSD Foundation nor the names of its
50 *    contributors may be used to endorse or promote products derived
51 *    from this software without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66#define _DYN_LOADER
67
68#include <sys/types.h>
69#include <sys/exec_elf.h>
70#include <sys/syscall.h>
71#include <sys/unistd.h>
72
73#include <machine/reloc.h>
74#include <machine/trap.h>	/* for ST_SYSCALL */
75
76#include "util.h"
77#include "resolve.h"
78
79int64_t pcookie __attribute__((section(".openbsd.randomdata"))) __dso_hidden;
80
81/*
82 * The following table holds for each relocation type:
83 *	- the width in bits of the memory location the relocation
84 *	  applies to (not currently used)
85 *	- the number of bits the relocation value must be shifted to the
86 *	  right (i.e. discard least significant bits) to fit into
87 *	  the appropriate field in the instruction word.
88 *	- flags indicating whether
89 *		* the relocation involves a symbol
90 *		* the relocation is relative to the current position
91 *		* the relocation is for a GOT entry
92 *		* the relocation is relative to the load address
93 *
94 */
95#define _RF_S		0x80000000		/* Resolve symbol */
96#define _RF_A		0x40000000		/* Use addend */
97#define _RF_P		0x20000000		/* Location relative */
98#define _RF_G		0x10000000		/* GOT offset */
99#define _RF_B		0x08000000		/* Load address relative */
100#define _RF_U		0x04000000		/* Unaligned */
101#define _RF_SZ(s)	(((s) & 0xff) << 8)	/* memory target size */
102#define _RF_RS(s)	((s) & 0xff)		/* right shift */
103static const int reloc_target_flags[] = {
104	0,							/* NONE */
105	_RF_S|_RF_A|		_RF_SZ(8)  | _RF_RS(0),		/* RELOC_8 */
106	_RF_S|_RF_A|		_RF_SZ(16) | _RF_RS(0),		/* RELOC_16 */
107	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* RELOC_32 */
108	_RF_S|_RF_A|_RF_P|	_RF_SZ(8)  | _RF_RS(0),		/* DISP_8 */
109	_RF_S|_RF_A|_RF_P|	_RF_SZ(16) | _RF_RS(0),		/* DISP_16 */
110	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* DISP_32 */
111	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP_30 */
112	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP_22 */
113	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* HI22 */
114	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 22 */
115	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 13 */
116	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LO10 */
117	_RF_G|			_RF_SZ(32) | _RF_RS(0),		/* GOT10 */
118	_RF_G|			_RF_SZ(32) | _RF_RS(0),		/* GOT13 */
119	_RF_G|			_RF_SZ(32) | _RF_RS(10),	/* GOT22 */
120	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PC10 */
121	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PC22 */
122	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WPLT30 */
123	_RF_S|			_RF_SZ(32) | _RF_RS(0),		/* COPY */
124	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* GLOB_DAT */
125	_RF_S|			_RF_SZ(32) | _RF_RS(0),		/* JMP_SLOT */
126	      _RF_A|	_RF_B|	_RF_SZ(64) | _RF_RS(0),		/* RELATIVE */
127	_RF_S|_RF_A|	_RF_U|	_RF_SZ(32) | _RF_RS(0),		/* UA_32 */
128
129	      _RF_A|		_RF_SZ(32) | _RF_RS(0),		/* PLT32 */
130	      _RF_A|		_RF_SZ(32) | _RF_RS(10),	/* HIPLT22 */
131	      _RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LOPLT10 */
132	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PCPLT32 */
133	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PCPLT22 */
134	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PCPLT10 */
135	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 10 */
136	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 11 */
137	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* 64 */
138	_RF_S|_RF_A|/*extra*/	_RF_SZ(32) | _RF_RS(0),		/* OLO10 */
139	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(42),	/* HH22 */
140	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(32),	/* HM10 */
141	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* LM22 */
142	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(42),	/* PC_HH22 */
143	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(32),	/* PC_HM10 */
144	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PC_LM22 */
145	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP16 */
146	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP19 */
147	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* GLOB_JMP */
148	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 7 */
149	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 5 */
150	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 6 */
151	_RF_S|_RF_A|_RF_P|	_RF_SZ(64) | _RF_RS(0),		/* DISP64 */
152	      _RF_A|		_RF_SZ(64) | _RF_RS(0),		/* PLT64 */
153	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* HIX22 */
154	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LOX10 */
155	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(22),	/* H44 */
156	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(12),	/* M44 */
157	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* L44 */
158	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* REGISTER */
159	_RF_S|_RF_A|	_RF_U|	_RF_SZ(64) | _RF_RS(0),		/* UA64 */
160	_RF_S|_RF_A|	_RF_U|	_RF_SZ(16) | _RF_RS(0),		/* UA16 */
161};
162
163#define RELOC_RESOLVE_SYMBOL(t)		((reloc_target_flags[t] & _RF_S) != 0)
164#define RELOC_PC_RELATIVE(t)		((reloc_target_flags[t] & _RF_P) != 0)
165#define RELOC_BASE_RELATIVE(t)		((reloc_target_flags[t] & _RF_B) != 0)
166#define RELOC_UNALIGNED(t)		((reloc_target_flags[t] & _RF_U) != 0)
167#define RELOC_USE_ADDEND(t)		((reloc_target_flags[t] & _RF_A) != 0)
168#define RELOC_TARGET_SIZE(t)		((reloc_target_flags[t] >> 8) & 0xff)
169#define RELOC_VALUE_RIGHTSHIFT(t)	(reloc_target_flags[t] & 0xff)
170
171static const long reloc_target_bitmask[] = {
172#define _BM(x)	(~(-(1ULL << (x))))
173	0,				/* NONE */
174	_BM(8), _BM(16), _BM(32),	/* RELOC_8, _16, _32 */
175	_BM(8), _BM(16), _BM(32),	/* DISP8, DISP16, DISP32 */
176	_BM(30), _BM(22),		/* WDISP30, WDISP22 */
177	_BM(22), _BM(22),		/* HI22, _22 */
178	_BM(13), _BM(10),		/* RELOC_13, _LO10 */
179	_BM(10), _BM(13), _BM(22),	/* GOT10, GOT13, GOT22 */
180	_BM(10), _BM(22),		/* _PC10, _PC22 */
181	_BM(30), 0,			/* _WPLT30, _COPY */
182	-1, _BM(32), -1,		/* _GLOB_DAT, JMP_SLOT, _RELATIVE */
183	_BM(32), _BM(32),		/* _UA32, PLT32 */
184	_BM(22), _BM(10),		/* _HIPLT22, LOPLT10 */
185	_BM(32), _BM(22), _BM(10),	/* _PCPLT32, _PCPLT22, _PCPLT10 */
186	_BM(10), _BM(11), -1,		/* _10, _11, _64 */
187	_BM(10), _BM(22),		/* _OLO10, _HH22 */
188	_BM(10), _BM(22),		/* _HM10, _LM22 */
189	_BM(22), _BM(10), _BM(22),	/* _PC_HH22, _PC_HM10, _PC_LM22 */
190	_BM(16), _BM(19),		/* _WDISP16, _WDISP19 */
191	-1,				/* GLOB_JMP */
192	_BM(7), _BM(5), _BM(6)		/* _7, _5, _6 */
193	-1, -1,				/* DISP64, PLT64 */
194	_BM(22), _BM(13),		/* HIX22, LOX10 */
195	_BM(22), _BM(10), _BM(13),	/* H44, M44, L44 */
196	-1, -1, _BM(16),		/* REGISTER, UA64, UA16 */
197#undef _BM
198};
199#define RELOC_VALUE_BITMASK(t)	(reloc_target_bitmask[t])
200
201int _dl_reloc_plt(Elf_Word *where1, Elf_Word *where2, Elf_Word *pltaddr,
202	Elf_Addr value);
203void _dl_install_plt(Elf_Word *pltgot, Elf_Addr proc);
204
205int
206_dl_md_reloc(elf_object_t *object, int rel, int relasz)
207{
208	long	i;
209	long	numrela;
210	long	relrel;
211	int	fails = 0;
212	Elf_Addr loff;
213	Elf_Addr prev_value = 0;
214	const Elf_Sym *prev_sym = NULL;
215	Elf_RelA *relas;
216
217	loff = object->obj_base;
218	numrela = object->Dyn.info[relasz] / sizeof(Elf_RelA);
219	relrel = rel == DT_RELA ? object->relacount : 0;
220	relas = (Elf_RelA *)(object->Dyn.info[rel]);
221
222	if (relas == NULL)
223		return 0;
224
225	if (relrel > numrela)
226		_dl_die("relacount > numrel: %ld > %ld", relrel, numrela);
227
228	/* tight loop for leading RELATIVE relocs */
229	for (i = 0; i < relrel; i++, relas++) {
230		Elf_Addr *where;
231
232		where = (Elf_Addr *)(relas->r_offset + loff);
233		*where = relas->r_addend + loff;
234	}
235	for (; i < numrela; i++, relas++) {
236		Elf_Addr *where, value, mask;
237		Elf_Word type;
238		const Elf_Sym *sym;
239		const char *symn;
240
241		type = ELF_R_TYPE(relas->r_info);
242
243		if (type == R_TYPE(NONE) || type == R_TYPE(JMP_SLOT))
244			continue;
245
246		where = (Elf_Addr *)(relas->r_offset + loff);
247
248		if (RELOC_USE_ADDEND(type))
249			value = relas->r_addend;
250		else
251			value = 0;
252
253		sym = NULL;
254		symn = NULL;
255		if (RELOC_RESOLVE_SYMBOL(type)) {
256			sym = object->dyn.symtab;
257			sym += ELF_R_SYM(relas->r_info);
258			symn = object->dyn.strtab + sym->st_name;
259
260			if (sym->st_shndx != SHN_UNDEF &&
261			    ELF_ST_BIND(sym->st_info) == STB_LOCAL) {
262				value += loff;
263			} else if (sym == prev_sym) {
264				value += prev_value;
265			} else {
266				struct sym_res sr;
267
268				sr = _dl_find_symbol(symn,
269				    SYM_SEARCH_ALL|SYM_WARNNOTFOUND|SYM_NOTPLT,
270				    sym, object);
271				if (sr.sym == NULL) {
272resolve_failed:
273					if (ELF_ST_BIND(sym->st_info) !=
274					    STB_WEAK)
275						fails++;
276					continue;
277				}
278				prev_sym = sym;
279				prev_value = (Elf_Addr)(sr.obj->obj_base +
280				    sr.sym->st_value);
281				value += prev_value;
282			}
283		}
284
285		if (type == R_TYPE(COPY)) {
286			void *dstaddr = where;
287			const void *srcaddr;
288			const Elf_Sym *dstsym = sym;
289			struct sym_res sr;
290
291			sr = _dl_find_symbol(symn,
292			    SYM_SEARCH_OTHER|SYM_WARNNOTFOUND|SYM_NOTPLT,
293			    dstsym, object);
294			if (sr.sym == NULL)
295				goto resolve_failed;
296
297			srcaddr = (void *)(sr.obj->obj_base + sr.sym->st_value);
298			_dl_bcopy(srcaddr, dstaddr, dstsym->st_size);
299			continue;
300		}
301
302		if (RELOC_PC_RELATIVE(type))
303			value -= (Elf_Addr)where;
304		if (RELOC_BASE_RELATIVE(type))
305			value += loff;
306
307		mask = RELOC_VALUE_BITMASK(type);
308		value >>= RELOC_VALUE_RIGHTSHIFT(type);
309		value &= mask;
310
311		if (RELOC_UNALIGNED(type)) {
312			/* Handle unaligned relocations. */
313			Elf_Addr tmp = 0;
314			char *ptr = (char *)where;
315			int i, size = RELOC_TARGET_SIZE(type)/8;
316
317			/* Read it in one byte at a time. */
318			for (i=0; i<size; i++)
319				tmp = (tmp << 8) | ptr[i];
320
321			tmp &= ~mask;
322			tmp |= value;
323
324			/* Write it back out. */
325			for (i=0; i<size; i++)
326				ptr[i] = ((tmp >> (8*i)) & 0xff);
327		} else if (RELOC_TARGET_SIZE(type) > 32) {
328			*where &= ~mask;
329			*where |= value;
330		} else {
331			Elf32_Addr *where32 = (Elf32_Addr *)where;
332
333			*where32 &= ~mask;
334			*where32 |= value;
335		}
336	}
337
338	return fails;
339}
340
341/*
342 * Instruction templates:
343 */
344
345#define	BAA	0x30680000	/*	ba,a	%xcc, 0 */
346#define	SETHI	0x03000000	/*	sethi	%hi(0), %g1 */
347#define	JMP	0x81c06000	/*	jmpl	%g1+%lo(0), %g0	  <-- simm13 */
348#define	NOP	0x01000000	/*	sethi	%hi(0), %g0 */
349#define	OR	0x82106000	/*	or	%g1, 0, %g1 */
350#define	ORG5	0x8a116000	/*	or	%g5, 0, %g5 */
351#define	XOR	0x82186000	/*	xor	%g1, 0, %g1 */
352#define	MOV71	0x8210000f	/*	or	%o7, 0, %g1 */
353#define	MOV17	0x9e100001	/*	or	%g1, 0, %o7 */
354#define	CALL	0x40000000	/*	call	0	  <-- disp30 */
355#define	SLLX	0x83287000	/*	sllx	%g1, 0, %g1 */
356#define	SLLXG5	0x8b297000	/*	sllx	%g5, 0, %g5 */
357#define	SRAX	0x83387000	/*	srax	%g1, 0, %g1 */
358#define	SETHIG5	0x0b000000	/*	sethi	%hi(0), %g5 */
359#define	ORG15	0x82804005	/*	or	%g1, %g5, %g1 */
360
361
362/* %hi(v) with variable shift */
363#define	HIVAL(v, s)	(((v) >> (s)) &  0x003fffff)
364#define LOVAL(v)	((v) & 0x000003ff)
365
366int
367_dl_reloc_plt(Elf_Word *where1, Elf_Word *where2, Elf_Word *pltaddr,
368    Elf_Addr value)
369{
370	Elf_Addr offset;
371
372	/*
373	 * At the PLT entry pointed at by `where', we now construct
374	 * a direct transfer to the now fully resolved function
375	 * address.
376	 *
377	 * A PLT entry is supposed to start by looking like this:
378	 *
379	 *	sethi	%hi(. - .PLT0), %g1
380	 *	ba,a,pt	%xcc, .PLT1
381	 *	nop
382	 *	nop
383	 *	nop
384	 *	nop
385	 *	nop
386	 *	nop
387	 *
388	 * When we replace these entries we either (a) only replace
389	 * the second word (the ba,a,pt), or (b) replace multiple
390	 * words: one or more nops, then finally the ba,a,pt.  By
391	 * replacing the ba,a,pt last, we guarantee that the PLT can
392	 * be used by other threads even while it's being updated.
393	 * This is made slightly more complicated by kbind, for which
394	 * we need to pass them to the kernel in the order they get
395	 * written.  To that end, we store the word to overwrite the
396	 * ba,a,pt at *where1, and the words to overwrite the nops at
397	 * where2[0], where2[1], ...
398	 *
399	 * We now need to find out how far we need to jump.  We
400	 * have a choice of several different relocation techniques
401	 * which are increasingly expensive.
402	 */
403
404	offset = value - ((Elf_Addr)pltaddr);
405	if ((int64_t)(offset-4) <= (1L<<20) &&
406	    (int64_t)(offset-4) >= -(1L<<20)) {
407		/*
408		 * We're within 1MB -- we can use a direct branch insn.
409		 *
410		 * We can generate this pattern:
411		 *
412		 *	sethi	%hi(. - .PLT0), %g1
413		 *	ba,a,pt	%xcc, addr
414		 *	nop
415		 *	nop
416		 *	nop
417		 *	nop
418		 *	nop
419		 *	nop
420		 *
421		 */
422		*where1 = BAA | (((offset-4) >> 2) &0x7ffff);
423		return 0;
424	} else if (value < (1UL<<32)) {
425		/*
426		 * We're within 32-bits of address zero.
427		 *
428		 * The resulting code in the jump slot is:
429		 *
430		 *	sethi	%hi(. - .PLT0), %g1
431		 *	sethi	%hi(addr), %g1
432		 *	jmp	%g1+%lo(addr)
433		 *	nop
434		 *	nop
435		 *	nop
436		 *	nop
437		 *	nop
438		 *
439		 */
440		*where1 = SETHI | HIVAL(value, 10);
441		where2[0] = JMP   | LOVAL(value);
442		return 1;
443	} else if (value > -(1UL<<32)) {
444		/*
445		 * We're within 32-bits of address -1.
446		 *
447		 * The resulting code in the jump slot is:
448		 *
449		 *	sethi	%hi(. - .PLT0), %g1
450		 *	sethi	%hix(~addr), %g1
451		 *	xor	%g1, %lox(~addr), %g1
452		 *	jmp	%g1
453		 *	nop
454		 *	nop
455		 *	nop
456		 *	nop
457		 *
458		 */
459		*where1 = SETHI | HIVAL(~value, 10);
460		where2[0] = XOR | ((~value) & 0x00001fff);
461		where2[1] = JMP;
462		return 2;
463	} else if ((int64_t)(offset-8) <= (1L<<31) &&
464	    (int64_t)(offset-8) >= -((1L<<31) - 4)) {
465		/*
466		 * We're within 32-bits -- we can use a direct call insn
467		 *
468		 * The resulting code in the jump slot is:
469		 *
470		 *	sethi	%hi(. - .PLT0), %g1
471		 *	mov	%o7, %g1
472		 *	call	(.+offset)
473		 *	 mov	%g1, %o7
474		 *	nop
475		 *	nop
476		 *	nop
477		 *	nop
478		 *
479		 */
480		*where1 = MOV71;
481		where2[0] = CALL | (((offset-8) >> 2) & 0x3fffffff);
482		where2[1] = MOV17;
483		return 2;
484	} else if (value < (1L<<42)) {
485		/*
486		 * Target 42bits or smaller.
487		 *
488		 * The resulting code in the jump slot is:
489		 *
490		 *	sethi	%hi(. - .PLT0), %g1
491		 *	sethi	%hi(addr >> 20), %g1
492		 *	or	%g1, %lo(addr >> 10), %g1
493		 *	sllx	%g1, 10, %g1
494		 *	jmp	%g1+%lo(addr)
495		 *	nop
496		 *	nop
497		 *	nop
498		 *
499		 * this can handle addresses 0 - 0x3fffffffffc
500		 */
501		*where1 = SETHI | HIVAL(value, 20);
502		where2[0] = OR    | LOVAL(value >> 10);
503		where2[1] = SLLX  | 10;
504		where2[2] = JMP   | LOVAL(value);
505		return 3;
506	} else if (value > -(1UL<<41)) {
507		/*
508		 * Large target >= 0xfffffe0000000000UL
509		 *
510		 * The resulting code in the jump slot is:
511		 *
512		 *	sethi	%hi(. - .PLT0), %g1
513		 *	sethi	%hi(addr >> 20), %g1
514		 *	or	%g1, %lo(addr >> 10), %g1
515		 *	sllx	%g1, 32, %g1
516		 *	srax	%g1, 22, %g1
517		 *	jmp	%g1+%lo(addr)
518		 *	nop
519		 *	nop
520		 *	nop
521		 *
522		 */
523		*where1 = SETHI | HIVAL(value, 20);
524		where2[0] = OR   | LOVAL(value >> 10);
525		where2[1] = SLLX  | 32;
526		where2[2] = SRAX  | 22;
527		where2[3] = JMP   | LOVAL(value);
528		return 4;
529	} else {
530		/*
531		 * We need to load all 64-bits
532		 *
533		 * The resulting code in the jump slot is:
534		 *
535		 *	sethi	%hi(. - .PLT0), %g1
536		 *	sethi	%hi(addr >> 42), %g5
537		 *	sethi	%hi(addr >> 10), %g1
538		 *	or	%g1, %lo(addr >> 32), %g5
539		 *	sllx	%g5, 32, %g5
540		 *	or	%g1, %g5, %g1
541		 *	jmp	%g1+%lo(addr)
542		 *	nop
543		 *
544		 */
545		*where1 = SETHIG5 | HIVAL(value, 42);
546		where2[0] = SETHI | HIVAL(value, 10);
547		where2[1] = ORG5 | LOVAL(value >> 32);
548		where2[2] = SLLXG5 | 32;
549		where2[3] = ORG15;
550		where2[4] = JMP | LOVAL(value);
551		return 5;
552	}
553}
554
555/*
556 * Resolve a symbol at run-time.
557 */
558Elf_Addr
559_dl_bind(elf_object_t *object, int index)
560{
561	Elf_RelA *rela;
562	Elf_Word *addr;
563	Elf_Addr newvalue;
564	struct sym_res sr;
565	const Elf_Sym *sym;
566	const char *symn;
567	int64_t cookie = pcookie;
568	struct {
569		struct __kbind param[2];
570		Elf_Word newval[6];
571	} buf;
572	struct __kbind *param;
573	size_t psize;
574	int i;
575
576	rela = (Elf_RelA *)(object->Dyn.info[DT_JMPREL]);
577	if (ELF_R_TYPE(rela->r_info) == R_TYPE(JMP_SLOT)) {
578		/*
579		 * XXXX
580		 *
581		 * The first four PLT entries are reserved.  There
582		 * is some disagreement whether they should have
583		 * associated relocation entries.  Both the SPARC
584		 * 32-bit and 64-bit ELF specifications say that
585		 * they should have relocation entries, but the
586		 * 32-bit SPARC binutils do not generate them,
587		 * and now the 64-bit SPARC binutils have stopped
588		 * generating them too.
589		 *
590		 * So, to provide binary compatibility, we will
591		 * check the first entry, if it is reserved it
592		 * should not be of the type JMP_SLOT.  If it
593		 * is JMP_SLOT, then the 4 reserved entries were
594		 * not generated and our index is 4 entries too far.
595		 */
596		rela += index - 4;
597	} else
598		rela += index;
599
600	sym = object->dyn.symtab;
601	sym += ELF_R_SYM(rela->r_info);
602	symn = object->dyn.strtab + sym->st_name;
603
604	sr = _dl_find_symbol(symn, SYM_SEARCH_ALL|SYM_WARNNOTFOUND|SYM_PLT,
605	    sym, object);
606	if (sr.sym == NULL)
607		_dl_die("lazy binding failed!");
608
609	newvalue = sr.obj->obj_base + sr.sym->st_value;
610
611	if (__predict_false(sr.obj->traced) && _dl_trace_plt(sr.obj, symn))
612		return newvalue;
613
614	/*
615	 * While some relocations just need to write one word and
616	 * can do that with kbind() with just one block, many
617	 * require two blocks to be written: all but first word,
618	 * then the first word.  So, if we want to write 5 words
619	 * in total, then the layout of the buffer we pass to
620	 * kbind() needs to be one of these:
621	 *   +------------+
622	 *   | kbind.addr |
623	 *   |     """    |
624	 *   | kbind.size |
625	 *   |     """    |		+------------+
626	 *   | kbind.addr |		| kbind.addr |
627	 *   |     """    |		|     """    |
628	 *   | kbind.size |		| kbind.size |
629	 *   |     """    |		|     """    |
630	 *   |   word 2   |		|    word    |
631	 *   |   word 3   |		+------------+
632	 *   |   word 4   |
633	 *   |   word 5   |
634	 *   |   word 1   |
635	 *   +------------+
636	 *
637	 * We first handle the special case of relocations with a
638	 * non-zero r_addend, which have one block to update whose
639	 * address is the relocation address itself.  This is only
640	 * used for PLT entries after the 2^15th, i.e., truly monstrous
641	 * programs, thus the __predict_false().
642	 */
643	addr = (Elf_Word *)(object->obj_base + rela->r_offset);
644	_dl_memset(&buf, 0, sizeof(buf));
645	if (__predict_false(rela->r_addend)) {
646		/*
647		 * This entry is >32768.  The relocation points to a
648		 * PC-relative pointer to the _dl_bind_start_0 stub at
649		 * the top of the PLT section.  Update it to point to
650		 * the target function.
651		 */
652		buf.newval[0] = rela->r_addend + newvalue
653		    - object->Dyn.info[DT_PLTGOT];
654		buf.param[1].kb_addr = addr;
655		buf.param[1].kb_size = sizeof(buf.newval[0]);
656		param = &buf.param[1];
657		psize = sizeof(struct __kbind) + sizeof(buf.newval[0]);
658	} else {
659		Elf_Word first;
660
661		/*
662		 * For the other relocations, the word at the relocation
663		 * address will be left unchanged.  Assume _dl_reloc_plt()
664		 * will tell us to update multiple words, so save the first
665		 * word to the side.
666		 */
667		i = _dl_reloc_plt(&first, &buf.newval[0], addr, newvalue);
668
669		/*
670		 * _dl_reloc_plt() returns the number of words that must be
671		 * written after the first word in location, but before it
672		 * in time.  If it returns zero, then only a single block
673		 * with one word is needed, so we just put it in place per
674		 * the right-hand diagram and just use param[1] and newval[0]
675		 */
676		if (i == 0) {
677			/* fill in the __kbind structure */
678			buf.param[1].kb_addr = &addr[1];
679			buf.param[1].kb_size = sizeof(Elf_Word);
680			buf.newval[0] = first;
681			param = &buf.param[1];
682			psize = sizeof(struct __kbind) + sizeof(buf.newval[0]);
683		} else {
684			/*
685			 * Two blocks are necessary.  Save the first word
686			 * after the other words.
687			 */
688			buf.param[0].kb_addr = &addr[2];
689			buf.param[0].kb_size = i * sizeof(Elf_Word);
690			buf.param[1].kb_addr = &addr[1];
691			buf.param[1].kb_size = sizeof(Elf_Word);
692			buf.newval[i] = first;
693			param = &buf.param[0];
694			psize = 2 * sizeof(struct __kbind) +
695			    (i + 1) * sizeof(buf.newval[0]);
696		}
697	}
698
699	/* directly code the syscall, so that it's actually inline here */
700	{
701		register long syscall_num __asm("g1") = SYS_kbind;
702		register void *arg1 __asm("o0") = param;
703		register long  arg2 __asm("o1") = psize;
704		register long  arg3 __asm("o2") = cookie;
705
706		__asm volatile("t %2" : "+r" (arg1), "+r" (arg2)
707		    : "i" (ST_SYSCALL), "r" (syscall_num), "r" (arg3)
708		    : "cc", "memory");
709	}
710
711	return newvalue;
712}
713
714/*
715 * Install rtld function call into this PLT slot.
716 */
717#define SAVE		0x9de3bf50
718#define SETHI_l0	0x21000000
719#define SETHI_l1	0x23000000
720#define OR_l0_l0	0xa0142000
721#define SLLX_l0_32_l0	0xa12c3020
722#define OR_l0_l1_l0	0xa0140011
723#define JMPL_l0_o1	0x93c42000
724#define MOV_g1_o0	0x90100001
725
726void
727_dl_install_plt(Elf_Word *pltgot, Elf_Addr proc)
728{
729	pltgot[0] = SAVE;
730	pltgot[1] = SETHI_l0  | HIVAL(proc, 42);
731	pltgot[2] = SETHI_l1  | HIVAL(proc, 10);
732	pltgot[3] = OR_l0_l0  | LOVAL((proc) >> 32);
733	pltgot[4] = SLLX_l0_32_l0;
734	pltgot[5] = OR_l0_l1_l0;
735	pltgot[6] = JMPL_l0_o1 | LOVAL(proc);
736	pltgot[7] = MOV_g1_o0;
737}
738
739void _dl_bind_start_0(long, long);
740void _dl_bind_start_1(long, long);
741
742static int
743_dl_md_reloc_all_plt(elf_object_t *object)
744{
745	long	i;
746	long	numrela;
747	int	fails = 0;
748	Elf_Addr loff;
749	Elf_RelA *relas;
750
751	loff = object->obj_base;
752	numrela = object->Dyn.info[DT_PLTRELSZ] / sizeof(Elf_RelA);
753	relas = (Elf_RelA *)(object->Dyn.info[DT_JMPREL]);
754
755	if (relas == NULL)
756		return 0;
757
758	for (i = 0; i < numrela; i++, relas++) {
759		Elf_Addr value;
760		Elf_Word *where;
761		struct sym_res sr;
762		const Elf_Sym *sym;
763
764		if (ELF_R_TYPE(relas->r_info) != R_TYPE(JMP_SLOT))
765			continue;
766
767		sym = object->dyn.symtab + ELF_R_SYM(relas->r_info);
768
769		sr = _dl_find_symbol(object->dyn.strtab + sym->st_name,
770		    SYM_SEARCH_ALL|SYM_WARNNOTFOUND|SYM_PLT,
771		    sym, object);
772		if (sr.sym == NULL) {
773			if (ELF_ST_BIND(sym->st_info) != STB_WEAK)
774				fails++;
775			continue;
776		}
777
778		where = (Elf_Word *)(relas->r_offset + loff);
779		value = sr.obj->obj_base + sr.sym->st_value;
780
781		if (__predict_false(relas->r_addend)) {
782			/*
783			 * This entry is >32768.  The relocation points to a
784			 * PC-relative pointer to the _dl_bind_start_0 stub at
785			 * the top of the PLT section.  Update it to point to
786			 * the target function.
787			 */
788			*(Elf_Addr *)where = relas->r_addend + value -
789			    object->Dyn.info[DT_PLTGOT];
790		} else
791			_dl_reloc_plt(&where[1], &where[2], where, value);
792	}
793
794	return fails;
795}
796
797/*
798 *	Relocate the Global Offset Table (GOT).
799 */
800int
801_dl_md_reloc_got(elf_object_t *object, int lazy)
802{
803	int	fails = 0;
804	Elf_Addr *pltgot = (Elf_Addr *)object->Dyn.info[DT_PLTGOT];
805	Elf_Word *entry = (Elf_Word *)pltgot;
806
807	if (object->Dyn.info[DT_PLTREL] != DT_RELA)
808		return 0;
809
810	if (!lazy) {
811		fails = _dl_md_reloc_all_plt(object);
812	} else {
813		_dl_install_plt(&entry[0], (Elf_Addr)&_dl_bind_start_0);
814		_dl_install_plt(&entry[8], (Elf_Addr)&_dl_bind_start_1);
815
816		pltgot[8] = (Elf_Addr)object;
817	}
818
819	return fails;
820}
821