1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
4 *
5 * This is an implementation of a DWARF unwinder. Its main purpose is
6 * for generating stacktrace information. Based on the DWARF 3
7 * specification from http://www.dwarfstd.org.
8 *
9 * TODO:
10 *	- DWARF64 doesn't work.
11 *	- Registers with DWARF_VAL_OFFSET rules aren't handled properly.
12 */
13
14/* #define DEBUG */
15#include <linux/kernel.h>
16#include <linux/io.h>
17#include <linux/list.h>
18#include <linux/mempool.h>
19#include <linux/mm.h>
20#include <linux/elf.h>
21#include <linux/ftrace.h>
22#include <linux/module.h>
23#include <linux/slab.h>
24#include <asm/dwarf.h>
25#include <asm/unwinder.h>
26#include <asm/sections.h>
27#include <asm/unaligned.h>
28#include <asm/stacktrace.h>
29
30/* Reserve enough memory for two stack frames */
31#define DWARF_FRAME_MIN_REQ	2
32/* ... with 4 registers per frame. */
33#define DWARF_REG_MIN_REQ	(DWARF_FRAME_MIN_REQ * 4)
34
35static struct kmem_cache *dwarf_frame_cachep;
36static mempool_t *dwarf_frame_pool;
37
38static struct kmem_cache *dwarf_reg_cachep;
39static mempool_t *dwarf_reg_pool;
40
41static struct rb_root cie_root;
42static DEFINE_SPINLOCK(dwarf_cie_lock);
43
44static struct rb_root fde_root;
45static DEFINE_SPINLOCK(dwarf_fde_lock);
46
47static struct dwarf_cie *cached_cie;
48
49static unsigned int dwarf_unwinder_ready;
50
51/**
52 *	dwarf_frame_alloc_reg - allocate memory for a DWARF register
53 *	@frame: the DWARF frame whose list of registers we insert on
54 *	@reg_num: the register number
55 *
56 *	Allocate space for, and initialise, a dwarf reg from
57 *	dwarf_reg_pool and insert it onto the (unsorted) linked-list of
58 *	dwarf registers for @frame.
59 *
60 *	Return the initialised DWARF reg.
61 */
62static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
63					       unsigned int reg_num)
64{
65	struct dwarf_reg *reg;
66
67	reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
68	if (!reg) {
69		printk(KERN_WARNING "Unable to allocate a DWARF register\n");
70		/*
71		 * Let's just bomb hard here, we have no way to
72		 * gracefully recover.
73		 */
74		UNWINDER_BUG();
75	}
76
77	reg->number = reg_num;
78	reg->addr = 0;
79	reg->flags = 0;
80
81	list_add(&reg->link, &frame->reg_list);
82
83	return reg;
84}
85
86static void dwarf_frame_free_regs(struct dwarf_frame *frame)
87{
88	struct dwarf_reg *reg, *n;
89
90	list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
91		list_del(&reg->link);
92		mempool_free(reg, dwarf_reg_pool);
93	}
94}
95
96/**
97 *	dwarf_frame_reg - return a DWARF register
98 *	@frame: the DWARF frame to search in for @reg_num
99 *	@reg_num: the register number to search for
100 *
101 *	Lookup and return the dwarf reg @reg_num for this frame. Return
102 *	NULL if @reg_num is an register invalid number.
103 */
104static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
105					 unsigned int reg_num)
106{
107	struct dwarf_reg *reg;
108
109	list_for_each_entry(reg, &frame->reg_list, link) {
110		if (reg->number == reg_num)
111			return reg;
112	}
113
114	return NULL;
115}
116
117/**
118 *	dwarf_read_addr - read dwarf data
119 *	@src: source address of data
120 *	@dst: destination address to store the data to
121 *
122 *	Read 'n' bytes from @src, where 'n' is the size of an address on
123 *	the native machine. We return the number of bytes read, which
124 *	should always be 'n'. We also have to be careful when reading
125 *	from @src and writing to @dst, because they can be arbitrarily
126 *	aligned. Return 'n' - the number of bytes read.
127 */
128static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
129{
130	u32 val = get_unaligned(src);
131	put_unaligned(val, dst);
132	return sizeof(unsigned long *);
133}
134
135/**
136 *	dwarf_read_uleb128 - read unsigned LEB128 data
137 *	@addr: the address where the ULEB128 data is stored
138 *	@ret: address to store the result
139 *
140 *	Decode an unsigned LEB128 encoded datum. The algorithm is taken
141 *	from Appendix C of the DWARF 3 spec. For information on the
142 *	encodings refer to section "7.6 - Variable Length Data". Return
143 *	the number of bytes read.
144 */
145static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
146{
147	unsigned int result;
148	unsigned char byte;
149	int shift, count;
150
151	result = 0;
152	shift = 0;
153	count = 0;
154
155	while (1) {
156		byte = __raw_readb(addr);
157		addr++;
158		count++;
159
160		result |= (byte & 0x7f) << shift;
161		shift += 7;
162
163		if (!(byte & 0x80))
164			break;
165	}
166
167	*ret = result;
168
169	return count;
170}
171
172/**
173 *	dwarf_read_leb128 - read signed LEB128 data
174 *	@addr: the address of the LEB128 encoded data
175 *	@ret: address to store the result
176 *
177 *	Decode signed LEB128 data. The algorithm is taken from Appendix
178 *	C of the DWARF 3 spec. Return the number of bytes read.
179 */
180static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
181{
182	unsigned char byte;
183	int result, shift;
184	int num_bits;
185	int count;
186
187	result = 0;
188	shift = 0;
189	count = 0;
190
191	while (1) {
192		byte = __raw_readb(addr);
193		addr++;
194		result |= (byte & 0x7f) << shift;
195		shift += 7;
196		count++;
197
198		if (!(byte & 0x80))
199			break;
200	}
201
202	/* The number of bits in a signed integer. */
203	num_bits = 8 * sizeof(result);
204
205	if ((shift < num_bits) && (byte & 0x40))
206		result |= (-1 << shift);
207
208	*ret = result;
209
210	return count;
211}
212
213/**
214 *	dwarf_read_encoded_value - return the decoded value at @addr
215 *	@addr: the address of the encoded value
216 *	@val: where to write the decoded value
217 *	@encoding: the encoding with which we can decode @addr
218 *
219 *	GCC emits encoded address in the .eh_frame FDE entries. Decode
220 *	the value at @addr using @encoding. The decoded value is written
221 *	to @val and the number of bytes read is returned.
222 */
223static int dwarf_read_encoded_value(char *addr, unsigned long *val,
224				    char encoding)
225{
226	unsigned long decoded_addr = 0;
227	int count = 0;
228
229	switch (encoding & 0x70) {
230	case DW_EH_PE_absptr:
231		break;
232	case DW_EH_PE_pcrel:
233		decoded_addr = (unsigned long)addr;
234		break;
235	default:
236		pr_debug("encoding=0x%x\n", (encoding & 0x70));
237		UNWINDER_BUG();
238	}
239
240	if ((encoding & 0x07) == 0x00)
241		encoding |= DW_EH_PE_udata4;
242
243	switch (encoding & 0x0f) {
244	case DW_EH_PE_sdata4:
245	case DW_EH_PE_udata4:
246		count += 4;
247		decoded_addr += get_unaligned((u32 *)addr);
248		__raw_writel(decoded_addr, val);
249		break;
250	default:
251		pr_debug("encoding=0x%x\n", encoding);
252		UNWINDER_BUG();
253	}
254
255	return count;
256}
257
258/**
259 *	dwarf_entry_len - return the length of an FDE or CIE
260 *	@addr: the address of the entry
261 *	@len: the length of the entry
262 *
263 *	Read the initial_length field of the entry and store the size of
264 *	the entry in @len. We return the number of bytes read. Return a
265 *	count of 0 on error.
266 */
267static inline int dwarf_entry_len(char *addr, unsigned long *len)
268{
269	u32 initial_len;
270	int count;
271
272	initial_len = get_unaligned((u32 *)addr);
273	count = 4;
274
275	/*
276	 * An initial length field value in the range DW_LEN_EXT_LO -
277	 * DW_LEN_EXT_HI indicates an extension, and should not be
278	 * interpreted as a length. The only extension that we currently
279	 * understand is the use of DWARF64 addresses.
280	 */
281	if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
282		/*
283		 * The 64-bit length field immediately follows the
284		 * compulsory 32-bit length field.
285		 */
286		if (initial_len == DW_EXT_DWARF64) {
287			*len = get_unaligned((u64 *)addr + 4);
288			count = 12;
289		} else {
290			printk(KERN_WARNING "Unknown DWARF extension\n");
291			count = 0;
292		}
293	} else
294		*len = initial_len;
295
296	return count;
297}
298
299/**
300 *	dwarf_lookup_cie - locate the cie
301 *	@cie_ptr: pointer to help with lookup
302 */
303static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
304{
305	struct rb_node **rb_node = &cie_root.rb_node;
306	struct dwarf_cie *cie = NULL;
307	unsigned long flags;
308
309	spin_lock_irqsave(&dwarf_cie_lock, flags);
310
311	/*
312	 * We've cached the last CIE we looked up because chances are
313	 * that the FDE wants this CIE.
314	 */
315	if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
316		cie = cached_cie;
317		goto out;
318	}
319
320	while (*rb_node) {
321		struct dwarf_cie *cie_tmp;
322
323		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
324		BUG_ON(!cie_tmp);
325
326		if (cie_ptr == cie_tmp->cie_pointer) {
327			cie = cie_tmp;
328			cached_cie = cie_tmp;
329			goto out;
330		} else {
331			if (cie_ptr < cie_tmp->cie_pointer)
332				rb_node = &(*rb_node)->rb_left;
333			else
334				rb_node = &(*rb_node)->rb_right;
335		}
336	}
337
338out:
339	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
340	return cie;
341}
342
343/**
344 *	dwarf_lookup_fde - locate the FDE that covers pc
345 *	@pc: the program counter
346 */
347struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
348{
349	struct rb_node **rb_node = &fde_root.rb_node;
350	struct dwarf_fde *fde = NULL;
351	unsigned long flags;
352
353	spin_lock_irqsave(&dwarf_fde_lock, flags);
354
355	while (*rb_node) {
356		struct dwarf_fde *fde_tmp;
357		unsigned long tmp_start, tmp_end;
358
359		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
360		BUG_ON(!fde_tmp);
361
362		tmp_start = fde_tmp->initial_location;
363		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
364
365		if (pc < tmp_start) {
366			rb_node = &(*rb_node)->rb_left;
367		} else {
368			if (pc < tmp_end) {
369				fde = fde_tmp;
370				goto out;
371			} else
372				rb_node = &(*rb_node)->rb_right;
373		}
374	}
375
376out:
377	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
378
379	return fde;
380}
381
382/**
383 *	dwarf_cfa_execute_insns - execute instructions to calculate a CFA
384 *	@insn_start: address of the first instruction
385 *	@insn_end: address of the last instruction
386 *	@cie: the CIE for this function
387 *	@fde: the FDE for this function
388 *	@frame: the instructions calculate the CFA for this frame
389 *	@pc: the program counter of the address we're interested in
390 *
391 *	Execute the Call Frame instruction sequence starting at
392 *	@insn_start and ending at @insn_end. The instructions describe
393 *	how to calculate the Canonical Frame Address of a stackframe.
394 *	Store the results in @frame.
395 */
396static int dwarf_cfa_execute_insns(unsigned char *insn_start,
397				   unsigned char *insn_end,
398				   struct dwarf_cie *cie,
399				   struct dwarf_fde *fde,
400				   struct dwarf_frame *frame,
401				   unsigned long pc)
402{
403	unsigned char insn;
404	unsigned char *current_insn;
405	unsigned int count, delta, reg, expr_len, offset;
406	struct dwarf_reg *regp;
407
408	current_insn = insn_start;
409
410	while (current_insn < insn_end && frame->pc <= pc) {
411		insn = __raw_readb(current_insn++);
412
413		/*
414		 * Firstly, handle the opcodes that embed their operands
415		 * in the instructions.
416		 */
417		switch (DW_CFA_opcode(insn)) {
418		case DW_CFA_advance_loc:
419			delta = DW_CFA_operand(insn);
420			delta *= cie->code_alignment_factor;
421			frame->pc += delta;
422			continue;
423			/* NOTREACHED */
424		case DW_CFA_offset:
425			reg = DW_CFA_operand(insn);
426			count = dwarf_read_uleb128(current_insn, &offset);
427			current_insn += count;
428			offset *= cie->data_alignment_factor;
429			regp = dwarf_frame_alloc_reg(frame, reg);
430			regp->addr = offset;
431			regp->flags |= DWARF_REG_OFFSET;
432			continue;
433			/* NOTREACHED */
434		case DW_CFA_restore:
435			reg = DW_CFA_operand(insn);
436			continue;
437			/* NOTREACHED */
438		}
439
440		/*
441		 * Secondly, handle the opcodes that don't embed their
442		 * operands in the instruction.
443		 */
444		switch (insn) {
445		case DW_CFA_nop:
446			continue;
447		case DW_CFA_advance_loc1:
448			delta = *current_insn++;
449			frame->pc += delta * cie->code_alignment_factor;
450			break;
451		case DW_CFA_advance_loc2:
452			delta = get_unaligned((u16 *)current_insn);
453			current_insn += 2;
454			frame->pc += delta * cie->code_alignment_factor;
455			break;
456		case DW_CFA_advance_loc4:
457			delta = get_unaligned((u32 *)current_insn);
458			current_insn += 4;
459			frame->pc += delta * cie->code_alignment_factor;
460			break;
461		case DW_CFA_offset_extended:
462			count = dwarf_read_uleb128(current_insn, &reg);
463			current_insn += count;
464			count = dwarf_read_uleb128(current_insn, &offset);
465			current_insn += count;
466			offset *= cie->data_alignment_factor;
467			break;
468		case DW_CFA_restore_extended:
469			count = dwarf_read_uleb128(current_insn, &reg);
470			current_insn += count;
471			break;
472		case DW_CFA_undefined:
473			count = dwarf_read_uleb128(current_insn, &reg);
474			current_insn += count;
475			regp = dwarf_frame_alloc_reg(frame, reg);
476			regp->flags |= DWARF_UNDEFINED;
477			break;
478		case DW_CFA_def_cfa:
479			count = dwarf_read_uleb128(current_insn,
480						   &frame->cfa_register);
481			current_insn += count;
482			count = dwarf_read_uleb128(current_insn,
483						   &frame->cfa_offset);
484			current_insn += count;
485
486			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
487			break;
488		case DW_CFA_def_cfa_register:
489			count = dwarf_read_uleb128(current_insn,
490						   &frame->cfa_register);
491			current_insn += count;
492			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
493			break;
494		case DW_CFA_def_cfa_offset:
495			count = dwarf_read_uleb128(current_insn, &offset);
496			current_insn += count;
497			frame->cfa_offset = offset;
498			break;
499		case DW_CFA_def_cfa_expression:
500			count = dwarf_read_uleb128(current_insn, &expr_len);
501			current_insn += count;
502
503			frame->cfa_expr = current_insn;
504			frame->cfa_expr_len = expr_len;
505			current_insn += expr_len;
506
507			frame->flags |= DWARF_FRAME_CFA_REG_EXP;
508			break;
509		case DW_CFA_offset_extended_sf:
510			count = dwarf_read_uleb128(current_insn, &reg);
511			current_insn += count;
512			count = dwarf_read_leb128(current_insn, &offset);
513			current_insn += count;
514			offset *= cie->data_alignment_factor;
515			regp = dwarf_frame_alloc_reg(frame, reg);
516			regp->flags |= DWARF_REG_OFFSET;
517			regp->addr = offset;
518			break;
519		case DW_CFA_val_offset:
520			count = dwarf_read_uleb128(current_insn, &reg);
521			current_insn += count;
522			count = dwarf_read_leb128(current_insn, &offset);
523			offset *= cie->data_alignment_factor;
524			regp = dwarf_frame_alloc_reg(frame, reg);
525			regp->flags |= DWARF_VAL_OFFSET;
526			regp->addr = offset;
527			break;
528		case DW_CFA_GNU_args_size:
529			count = dwarf_read_uleb128(current_insn, &offset);
530			current_insn += count;
531			break;
532		case DW_CFA_GNU_negative_offset_extended:
533			count = dwarf_read_uleb128(current_insn, &reg);
534			current_insn += count;
535			count = dwarf_read_uleb128(current_insn, &offset);
536			offset *= cie->data_alignment_factor;
537
538			regp = dwarf_frame_alloc_reg(frame, reg);
539			regp->flags |= DWARF_REG_OFFSET;
540			regp->addr = -offset;
541			break;
542		default:
543			pr_debug("unhandled DWARF instruction 0x%x\n", insn);
544			UNWINDER_BUG();
545			break;
546		}
547	}
548
549	return 0;
550}
551
552/**
553 *	dwarf_free_frame - free the memory allocated for @frame
554 *	@frame: the frame to free
555 */
556void dwarf_free_frame(struct dwarf_frame *frame)
557{
558	dwarf_frame_free_regs(frame);
559	mempool_free(frame, dwarf_frame_pool);
560}
561
562extern void ret_from_irq(void);
563
564/**
565 *	dwarf_unwind_stack - unwind the stack
566 *
567 *	@pc: address of the function to unwind
568 *	@prev: struct dwarf_frame of the previous stackframe on the callstack
569 *
570 *	Return a struct dwarf_frame representing the most recent frame
571 *	on the callstack. Each of the lower (older) stack frames are
572 *	linked via the "prev" member.
573 */
574struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
575				       struct dwarf_frame *prev)
576{
577	struct dwarf_frame *frame;
578	struct dwarf_cie *cie;
579	struct dwarf_fde *fde;
580	struct dwarf_reg *reg;
581	unsigned long addr;
582
583	/*
584	 * If we've been called in to before initialization has
585	 * completed, bail out immediately.
586	 */
587	if (!dwarf_unwinder_ready)
588		return NULL;
589
590	/*
591	 * If we're starting at the top of the stack we need get the
592	 * contents of a physical register to get the CFA in order to
593	 * begin the virtual unwinding of the stack.
594	 *
595	 * NOTE: the return address is guaranteed to be setup by the
596	 * time this function makes its first function call.
597	 */
598	if (!pc || !prev)
599		pc = _THIS_IP_;
600
601#ifdef CONFIG_FUNCTION_GRAPH_TRACER
602	/*
603	 * If our stack has been patched by the function graph tracer
604	 * then we might see the address of return_to_handler() where we
605	 * expected to find the real return address.
606	 */
607	if (pc == (unsigned long)&return_to_handler) {
608		struct ftrace_ret_stack *ret_stack;
609
610		ret_stack = ftrace_graph_get_ret_stack(current, 0);
611		if (ret_stack)
612			pc = ret_stack->ret;
613		/*
614		 * We currently have no way of tracking how many
615		 * return_to_handler()'s we've seen. If there is more
616		 * than one patched return address on our stack,
617		 * complain loudly.
618		 */
619		WARN_ON(ftrace_graph_get_ret_stack(current, 1));
620	}
621#endif
622
623	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
624	if (!frame) {
625		printk(KERN_ERR "Unable to allocate a dwarf frame\n");
626		UNWINDER_BUG();
627	}
628
629	INIT_LIST_HEAD(&frame->reg_list);
630	frame->flags = 0;
631	frame->prev = prev;
632	frame->return_addr = 0;
633
634	fde = dwarf_lookup_fde(pc);
635	if (!fde) {
636		/*
637		 * This is our normal exit path. There are two reasons
638		 * why we might exit here,
639		 *
640		 *	a) pc has no asscociated DWARF frame info and so
641		 *	we don't know how to unwind this frame. This is
642		 *	usually the case when we're trying to unwind a
643		 *	frame that was called from some assembly code
644		 *	that has no DWARF info, e.g. syscalls.
645		 *
646		 *	b) the DEBUG info for pc is bogus. There's
647		 *	really no way to distinguish this case from the
648		 *	case above, which sucks because we could print a
649		 *	warning here.
650		 */
651		goto bail;
652	}
653
654	cie = dwarf_lookup_cie(fde->cie_pointer);
655
656	frame->pc = fde->initial_location;
657
658	/* CIE initial instructions */
659	dwarf_cfa_execute_insns(cie->initial_instructions,
660				cie->instructions_end, cie, fde,
661				frame, pc);
662
663	/* FDE instructions */
664	dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
665				fde, frame, pc);
666
667	/* Calculate the CFA */
668	switch (frame->flags) {
669	case DWARF_FRAME_CFA_REG_OFFSET:
670		if (prev) {
671			reg = dwarf_frame_reg(prev, frame->cfa_register);
672			UNWINDER_BUG_ON(!reg);
673			UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
674
675			addr = prev->cfa + reg->addr;
676			frame->cfa = __raw_readl(addr);
677
678		} else {
679			/*
680			 * Again, we're starting from the top of the
681			 * stack. We need to physically read
682			 * the contents of a register in order to get
683			 * the Canonical Frame Address for this
684			 * function.
685			 */
686			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
687		}
688
689		frame->cfa += frame->cfa_offset;
690		break;
691	default:
692		UNWINDER_BUG();
693	}
694
695	reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
696
697	/*
698	 * If we haven't seen the return address register or the return
699	 * address column is undefined then we must assume that this is
700	 * the end of the callstack.
701	 */
702	if (!reg || reg->flags == DWARF_UNDEFINED)
703		goto bail;
704
705	UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
706
707	addr = frame->cfa + reg->addr;
708	frame->return_addr = __raw_readl(addr);
709
710	/*
711	 * Ah, the joys of unwinding through interrupts.
712	 *
713	 * Interrupts are tricky - the DWARF info needs to be _really_
714	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
715	 * info. For example, I've seen interrupts occur in epilogues
716	 * just after the frame pointer (r14) had been restored. The
717	 * problem was that the DWARF info claimed that the CFA could be
718	 * reached by using the value of the frame pointer before it was
719	 * restored.
720	 *
721	 * So until the compiler can be trusted to produce reliable
722	 * DWARF info when it really matters, let's stop unwinding once
723	 * we've calculated the function that was interrupted.
724	 */
725	if (prev && prev->pc == (unsigned long)ret_from_irq)
726		frame->return_addr = 0;
727
728	return frame;
729
730bail:
731	dwarf_free_frame(frame);
732	return NULL;
733}
734
735static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
736			   unsigned char *end, struct module *mod)
737{
738	struct rb_node **rb_node = &cie_root.rb_node;
739	struct rb_node *parent = *rb_node;
740	struct dwarf_cie *cie;
741	unsigned long flags;
742	int count;
743
744	cie = kzalloc(sizeof(*cie), GFP_KERNEL);
745	if (!cie)
746		return -ENOMEM;
747
748	cie->length = len;
749
750	/*
751	 * Record the offset into the .eh_frame section
752	 * for this CIE. It allows this CIE to be
753	 * quickly and easily looked up from the
754	 * corresponding FDE.
755	 */
756	cie->cie_pointer = (unsigned long)entry;
757
758	cie->version = *(char *)p++;
759	UNWINDER_BUG_ON(cie->version != 1);
760
761	cie->augmentation = p;
762	p += strlen(cie->augmentation) + 1;
763
764	count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
765	p += count;
766
767	count = dwarf_read_leb128(p, &cie->data_alignment_factor);
768	p += count;
769
770	/*
771	 * Which column in the rule table contains the
772	 * return address?
773	 */
774	if (cie->version == 1) {
775		cie->return_address_reg = __raw_readb(p);
776		p++;
777	} else {
778		count = dwarf_read_uleb128(p, &cie->return_address_reg);
779		p += count;
780	}
781
782	if (cie->augmentation[0] == 'z') {
783		unsigned int length, count;
784		cie->flags |= DWARF_CIE_Z_AUGMENTATION;
785
786		count = dwarf_read_uleb128(p, &length);
787		p += count;
788
789		UNWINDER_BUG_ON((unsigned char *)p > end);
790
791		cie->initial_instructions = p + length;
792		cie->augmentation++;
793	}
794
795	while (*cie->augmentation) {
796		/*
797		 * "L" indicates a byte showing how the
798		 * LSDA pointer is encoded. Skip it.
799		 */
800		if (*cie->augmentation == 'L') {
801			p++;
802			cie->augmentation++;
803		} else if (*cie->augmentation == 'R') {
804			/*
805			 * "R" indicates a byte showing
806			 * how FDE addresses are
807			 * encoded.
808			 */
809			cie->encoding = *(char *)p++;
810			cie->augmentation++;
811		} else if (*cie->augmentation == 'P') {
812			/*
813			 * "R" indicates a personality
814			 * routine in the CIE
815			 * augmentation.
816			 */
817			UNWINDER_BUG();
818		} else if (*cie->augmentation == 'S') {
819			UNWINDER_BUG();
820		} else {
821			/*
822			 * Unknown augmentation. Assume
823			 * 'z' augmentation.
824			 */
825			p = cie->initial_instructions;
826			UNWINDER_BUG_ON(!p);
827			break;
828		}
829	}
830
831	cie->initial_instructions = p;
832	cie->instructions_end = end;
833
834	/* Add to list */
835	spin_lock_irqsave(&dwarf_cie_lock, flags);
836
837	while (*rb_node) {
838		struct dwarf_cie *cie_tmp;
839
840		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
841
842		parent = *rb_node;
843
844		if (cie->cie_pointer < cie_tmp->cie_pointer)
845			rb_node = &parent->rb_left;
846		else if (cie->cie_pointer >= cie_tmp->cie_pointer)
847			rb_node = &parent->rb_right;
848		else
849			WARN_ON(1);
850	}
851
852	rb_link_node(&cie->node, parent, rb_node);
853	rb_insert_color(&cie->node, &cie_root);
854
855#ifdef CONFIG_MODULES
856	if (mod != NULL)
857		list_add_tail(&cie->link, &mod->arch.cie_list);
858#endif
859
860	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
861
862	return 0;
863}
864
865static int dwarf_parse_fde(void *entry, u32 entry_type,
866			   void *start, unsigned long len,
867			   unsigned char *end, struct module *mod)
868{
869	struct rb_node **rb_node = &fde_root.rb_node;
870	struct rb_node *parent = *rb_node;
871	struct dwarf_fde *fde;
872	struct dwarf_cie *cie;
873	unsigned long flags;
874	int count;
875	void *p = start;
876
877	fde = kzalloc(sizeof(*fde), GFP_KERNEL);
878	if (!fde)
879		return -ENOMEM;
880
881	fde->length = len;
882
883	/*
884	 * In a .eh_frame section the CIE pointer is the
885	 * delta between the address within the FDE
886	 */
887	fde->cie_pointer = (unsigned long)(p - entry_type - 4);
888
889	cie = dwarf_lookup_cie(fde->cie_pointer);
890	fde->cie = cie;
891
892	if (cie->encoding)
893		count = dwarf_read_encoded_value(p, &fde->initial_location,
894						 cie->encoding);
895	else
896		count = dwarf_read_addr(p, &fde->initial_location);
897
898	p += count;
899
900	if (cie->encoding)
901		count = dwarf_read_encoded_value(p, &fde->address_range,
902						 cie->encoding & 0x0f);
903	else
904		count = dwarf_read_addr(p, &fde->address_range);
905
906	p += count;
907
908	if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
909		unsigned int length;
910		count = dwarf_read_uleb128(p, &length);
911		p += count + length;
912	}
913
914	/* Call frame instructions. */
915	fde->instructions = p;
916	fde->end = end;
917
918	/* Add to list. */
919	spin_lock_irqsave(&dwarf_fde_lock, flags);
920
921	while (*rb_node) {
922		struct dwarf_fde *fde_tmp;
923		unsigned long tmp_start, tmp_end;
924		unsigned long start, end;
925
926		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
927
928		start = fde->initial_location;
929		end = fde->initial_location + fde->address_range;
930
931		tmp_start = fde_tmp->initial_location;
932		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
933
934		parent = *rb_node;
935
936		if (start < tmp_start)
937			rb_node = &parent->rb_left;
938		else if (start >= tmp_end)
939			rb_node = &parent->rb_right;
940		else
941			WARN_ON(1);
942	}
943
944	rb_link_node(&fde->node, parent, rb_node);
945	rb_insert_color(&fde->node, &fde_root);
946
947#ifdef CONFIG_MODULES
948	if (mod != NULL)
949		list_add_tail(&fde->link, &mod->arch.fde_list);
950#endif
951
952	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
953
954	return 0;
955}
956
957static void dwarf_unwinder_dump(struct task_struct *task,
958				struct pt_regs *regs,
959				unsigned long *sp,
960				const struct stacktrace_ops *ops,
961				void *data)
962{
963	struct dwarf_frame *frame, *_frame;
964	unsigned long return_addr;
965
966	_frame = NULL;
967	return_addr = 0;
968
969	while (1) {
970		frame = dwarf_unwind_stack(return_addr, _frame);
971
972		if (_frame)
973			dwarf_free_frame(_frame);
974
975		_frame = frame;
976
977		if (!frame || !frame->return_addr)
978			break;
979
980		return_addr = frame->return_addr;
981		ops->address(data, return_addr, 1);
982	}
983
984	if (frame)
985		dwarf_free_frame(frame);
986}
987
988static struct unwinder dwarf_unwinder = {
989	.name = "dwarf-unwinder",
990	.dump = dwarf_unwinder_dump,
991	.rating = 150,
992};
993
994static void __init dwarf_unwinder_cleanup(void)
995{
996	struct dwarf_fde *fde, *next_fde;
997	struct dwarf_cie *cie, *next_cie;
998
999	/*
1000	 * Deallocate all the memory allocated for the DWARF unwinder.
1001	 * Traverse all the FDE/CIE lists and remove and free all the
1002	 * memory associated with those data structures.
1003	 */
1004	rbtree_postorder_for_each_entry_safe(fde, next_fde, &fde_root, node)
1005		kfree(fde);
1006
1007	rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
1008		kfree(cie);
1009
1010	mempool_destroy(dwarf_reg_pool);
1011	mempool_destroy(dwarf_frame_pool);
1012	kmem_cache_destroy(dwarf_reg_cachep);
1013	kmem_cache_destroy(dwarf_frame_cachep);
1014}
1015
1016/**
1017 *	dwarf_parse_section - parse DWARF section
1018 *	@eh_frame_start: start address of the .eh_frame section
1019 *	@eh_frame_end: end address of the .eh_frame section
1020 *	@mod: the kernel module containing the .eh_frame section
1021 *
1022 *	Parse the information in a .eh_frame section.
1023 */
1024static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
1025			       struct module *mod)
1026{
1027	u32 entry_type;
1028	void *p, *entry;
1029	int count, err = 0;
1030	unsigned long len = 0;
1031	unsigned int c_entries, f_entries;
1032	unsigned char *end;
1033
1034	c_entries = 0;
1035	f_entries = 0;
1036	entry = eh_frame_start;
1037
1038	while ((char *)entry < eh_frame_end) {
1039		p = entry;
1040
1041		count = dwarf_entry_len(p, &len);
1042		if (count == 0) {
1043			/*
1044			 * We read a bogus length field value. There is
1045			 * nothing we can do here apart from disabling
1046			 * the DWARF unwinder. We can't even skip this
1047			 * entry and move to the next one because 'len'
1048			 * tells us where our next entry is.
1049			 */
1050			err = -EINVAL;
1051			goto out;
1052		} else
1053			p += count;
1054
1055		/* initial length does not include itself */
1056		end = p + len;
1057
1058		entry_type = get_unaligned((u32 *)p);
1059		p += 4;
1060
1061		if (entry_type == DW_EH_FRAME_CIE) {
1062			err = dwarf_parse_cie(entry, p, len, end, mod);
1063			if (err < 0)
1064				goto out;
1065			else
1066				c_entries++;
1067		} else {
1068			err = dwarf_parse_fde(entry, entry_type, p, len,
1069					      end, mod);
1070			if (err < 0)
1071				goto out;
1072			else
1073				f_entries++;
1074		}
1075
1076		entry = (char *)entry + len + 4;
1077	}
1078
1079	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
1080	       c_entries, f_entries);
1081
1082	return 0;
1083
1084out:
1085	return err;
1086}
1087
1088#ifdef CONFIG_MODULES
1089int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
1090			  struct module *me)
1091{
1092	unsigned int i, err;
1093	unsigned long start, end;
1094	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1095
1096	start = end = 0;
1097
1098	for (i = 1; i < hdr->e_shnum; i++) {
1099		/* Alloc bit cleared means "ignore it." */
1100		if ((sechdrs[i].sh_flags & SHF_ALLOC)
1101		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
1102			start = sechdrs[i].sh_addr;
1103			end = start + sechdrs[i].sh_size;
1104			break;
1105		}
1106	}
1107
1108	/* Did we find the .eh_frame section? */
1109	if (i != hdr->e_shnum) {
1110		INIT_LIST_HEAD(&me->arch.cie_list);
1111		INIT_LIST_HEAD(&me->arch.fde_list);
1112		err = dwarf_parse_section((char *)start, (char *)end, me);
1113		if (err) {
1114			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
1115			       me->name);
1116			return err;
1117		}
1118	}
1119
1120	return 0;
1121}
1122
1123/**
1124 *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
1125 *	@mod: the module that is being unloaded
1126 *
1127 *	Remove any FDEs and CIEs from the global lists that came from
1128 *	@mod's .eh_frame section because @mod is being unloaded.
1129 */
1130void module_dwarf_cleanup(struct module *mod)
1131{
1132	struct dwarf_fde *fde, *ftmp;
1133	struct dwarf_cie *cie, *ctmp;
1134	unsigned long flags;
1135
1136	spin_lock_irqsave(&dwarf_cie_lock, flags);
1137
1138	list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
1139		list_del(&cie->link);
1140		rb_erase(&cie->node, &cie_root);
1141		kfree(cie);
1142	}
1143
1144	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
1145
1146	spin_lock_irqsave(&dwarf_fde_lock, flags);
1147
1148	list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
1149		list_del(&fde->link);
1150		rb_erase(&fde->node, &fde_root);
1151		kfree(fde);
1152	}
1153
1154	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
1155}
1156#endif /* CONFIG_MODULES */
1157
1158/**
1159 *	dwarf_unwinder_init - initialise the dwarf unwinder
1160 *
1161 *	Build the data structures describing the .dwarf_frame section to
1162 *	make it easier to lookup CIE and FDE entries. Because the
1163 *	.eh_frame section is packed as tightly as possible it is not
1164 *	easy to lookup the FDE for a given PC, so we build a list of FDE
1165 *	and CIE entries that make it easier.
1166 */
1167static int __init dwarf_unwinder_init(void)
1168{
1169	int err = -ENOMEM;
1170
1171	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
1172			sizeof(struct dwarf_frame), 0,
1173			SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
1174
1175	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
1176			sizeof(struct dwarf_reg), 0,
1177			SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
1178
1179	dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
1180						    dwarf_frame_cachep);
1181	if (!dwarf_frame_pool)
1182		goto out;
1183
1184	dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ,
1185						  dwarf_reg_cachep);
1186	if (!dwarf_reg_pool)
1187		goto out;
1188
1189	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
1190	if (err)
1191		goto out;
1192
1193	err = unwinder_register(&dwarf_unwinder);
1194	if (err)
1195		goto out;
1196
1197	dwarf_unwinder_ready = 1;
1198
1199	return 0;
1200
1201out:
1202	printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
1203	dwarf_unwinder_cleanup();
1204	return err;
1205}
1206early_initcall(dwarf_unwinder_init);
1207