#
4708ea14 |
|
01-Mar-2023 |
Josh Poimboeuf <jpoimboe@kernel.org> |
x86,objtool: Separate unret validation from unwind hints The ENTRY unwind hint type is serving double duty as both an empty unwind hint and an unret validation annotation. Unret validation is unrelated to unwinding. Separate it out into its own annotation. Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/ff7448d492ea21b86d8a90264b105fbd0d751077.1677683419.git.jpoimboe@kernel.org
|
#
1c34496e |
|
08-Feb-2023 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Remove instruction::list Replace the instruction::list by allocating instructions in arrays of 256 entries and stringing them together by (amortized) find_insn(). This shrinks instruction by 16 bytes and brings it down to 128. struct instruction { - struct list_head list; /* 0 16 */ - struct hlist_node hash; /* 16 16 */ - struct list_head call_node; /* 32 16 */ - struct section * sec; /* 48 8 */ - long unsigned int offset; /* 56 8 */ - /* --- cacheline 1 boundary (64 bytes) --- */ - long unsigned int immediate; /* 64 8 */ - unsigned int len; /* 72 4 */ - u8 type; /* 76 1 */ - - /* Bitfield combined with previous fields */ + struct hlist_node hash; /* 0 16 */ + struct list_head call_node; /* 16 16 */ + struct section * sec; /* 32 8 */ + long unsigned int offset; /* 40 8 */ + long unsigned int immediate; /* 48 8 */ + u8 len; /* 56 1 */ + u8 prev_len; /* 57 1 */ + u8 type; /* 58 1 */ + s8 instr; /* 59 1 */ + u32 idx:8; /* 60: 0 4 */ + u32 dead_end:1; /* 60: 8 4 */ + u32 ignore:1; /* 60: 9 4 */ + u32 ignore_alts:1; /* 60:10 4 */ + u32 hint:1; /* 60:11 4 */ + u32 save:1; /* 60:12 4 */ + u32 restore:1; /* 60:13 4 */ + u32 retpoline_safe:1; /* 60:14 4 */ + u32 noendbr:1; /* 60:15 4 */ + u32 entry:1; /* 60:16 4 */ + u32 visited:4; /* 60:17 4 */ + u32 no_reloc:1; /* 60:21 4 */ - u16 dead_end:1; /* 76: 8 2 */ - u16 ignore:1; /* 76: 9 2 */ - u16 ignore_alts:1; /* 76:10 2 */ - u16 hint:1; /* 76:11 2 */ - u16 save:1; /* 76:12 2 */ - u16 restore:1; /* 76:13 2 */ - u16 retpoline_safe:1; /* 76:14 2 */ - u16 noendbr:1; /* 76:15 2 */ - u16 entry:1; /* 78: 0 2 */ - u16 visited:4; /* 78: 1 2 */ - u16 no_reloc:1; /* 78: 5 2 */ + /* XXX 10 bits hole, try to pack */ - /* XXX 2 bits hole, try to pack */ - /* Bitfield combined with next fields */ - - s8 instr; /* 79 1 */ - struct alt_group * alt_group; /* 80 8 */ - struct instruction * jump_dest; /* 88 8 */ - struct instruction * first_jump_src; /* 96 8 */ + /* --- cacheline 1 boundary (64 bytes) --- */ + struct alt_group * alt_group; /* 64 8 */ + struct instruction * jump_dest; /* 72 8 */ + struct instruction * first_jump_src; /* 80 8 */ union { - struct symbol * _call_dest; /* 104 8 */ - struct reloc * _jump_table; /* 104 8 */ - }; /* 104 8 */ - struct alternative * alts; /* 112 8 */ - struct symbol * sym; /* 120 8 */ - /* --- cacheline 2 boundary (128 bytes) --- */ - struct stack_op * stack_ops; /* 128 8 */ - struct cfi_state * cfi; /* 136 8 */ + struct symbol * _call_dest; /* 88 8 */ + struct reloc * _jump_table; /* 88 8 */ + }; /* 88 8 */ + struct alternative * alts; /* 96 8 */ + struct symbol * sym; /* 104 8 */ + struct stack_op * stack_ops; /* 112 8 */ + struct cfi_state * cfi; /* 120 8 */ - /* size: 144, cachelines: 3, members: 28 */ - /* sum members: 142 */ - /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */ - /* last cacheline: 16 bytes */ + /* size: 128, cachelines: 2, members: 29 */ + /* sum members: 124 */ + /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */ }; pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> Tested-by: Nathan Chancellor <nathan@kernel.org> # build only Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
|
#
c6f5dc28 |
|
08-Feb-2023 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Union instruction::{call_dest,jump_table} The instruction call_dest and jump_table members can never be used at the same time, their usage depends on type. struct instruction { struct list_head list; /* 0 16 */ struct hlist_node hash; /* 16 16 */ struct list_head call_node; /* 32 16 */ struct section * sec; /* 48 8 */ long unsigned int offset; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ long unsigned int immediate; /* 64 8 */ unsigned int len; /* 72 4 */ u8 type; /* 76 1 */ /* Bitfield combined with previous fields */ u16 dead_end:1; /* 76: 8 2 */ u16 ignore:1; /* 76: 9 2 */ u16 ignore_alts:1; /* 76:10 2 */ u16 hint:1; /* 76:11 2 */ u16 save:1; /* 76:12 2 */ u16 restore:1; /* 76:13 2 */ u16 retpoline_safe:1; /* 76:14 2 */ u16 noendbr:1; /* 76:15 2 */ u16 entry:1; /* 78: 0 2 */ u16 visited:4; /* 78: 1 2 */ u16 no_reloc:1; /* 78: 5 2 */ /* XXX 2 bits hole, try to pack */ /* Bitfield combined with next fields */ s8 instr; /* 79 1 */ struct alt_group * alt_group; /* 80 8 */ - struct symbol * call_dest; /* 88 8 */ - struct instruction * jump_dest; /* 96 8 */ - struct instruction * first_jump_src; /* 104 8 */ - struct reloc * jump_table; /* 112 8 */ - struct alternative * alts; /* 120 8 */ + struct instruction * jump_dest; /* 88 8 */ + struct instruction * first_jump_src; /* 96 8 */ + union { + struct symbol * _call_dest; /* 104 8 */ + struct reloc * _jump_table; /* 104 8 */ + }; /* 104 8 */ + struct alternative * alts; /* 112 8 */ + struct symbol * sym; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ - struct symbol * sym; /* 128 8 */ - struct stack_op * stack_ops; /* 136 8 */ - struct cfi_state * cfi; /* 144 8 */ + struct stack_op * stack_ops; /* 128 8 */ + struct cfi_state * cfi; /* 136 8 */ - /* size: 152, cachelines: 3, members: 29 */ - /* sum members: 150 */ + /* size: 144, cachelines: 3, members: 28 */ + /* sum members: 142 */ /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */ - /* last cacheline: 24 bytes */ + /* last cacheline: 16 bytes */ }; pre: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem post: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> Tested-by: Nathan Chancellor <nathan@kernel.org> # build only Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
|
#
0932dbe1 |
|
08-Feb-2023 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Remove instruction::reloc Instead of caching the reloc for each instruction, only keep a negative cache of not having a reloc (by far the most common case). struct instruction { struct list_head list; /* 0 16 */ struct hlist_node hash; /* 16 16 */ struct list_head call_node; /* 32 16 */ struct section * sec; /* 48 8 */ long unsigned int offset; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ long unsigned int immediate; /* 64 8 */ unsigned int len; /* 72 4 */ u8 type; /* 76 1 */ /* Bitfield combined with previous fields */ u16 dead_end:1; /* 76: 8 2 */ u16 ignore:1; /* 76: 9 2 */ u16 ignore_alts:1; /* 76:10 2 */ u16 hint:1; /* 76:11 2 */ u16 save:1; /* 76:12 2 */ u16 restore:1; /* 76:13 2 */ u16 retpoline_safe:1; /* 76:14 2 */ u16 noendbr:1; /* 76:15 2 */ u16 entry:1; /* 78: 0 2 */ u16 visited:4; /* 78: 1 2 */ + u16 no_reloc:1; /* 78: 5 2 */ - /* XXX 3 bits hole, try to pack */ + /* XXX 2 bits hole, try to pack */ /* Bitfield combined with next fields */ s8 instr; /* 79 1 */ struct alt_group * alt_group; /* 80 8 */ struct symbol * call_dest; /* 88 8 */ struct instruction * jump_dest; /* 96 8 */ struct instruction * first_jump_src; /* 104 8 */ struct reloc * jump_table; /* 112 8 */ - struct reloc * reloc; /* 120 8 */ + struct alternative * alts; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ - struct alternative * alts; /* 128 8 */ - struct symbol * sym; /* 136 8 */ - struct stack_op * stack_ops; /* 144 8 */ - struct cfi_state * cfi; /* 152 8 */ + struct symbol * sym; /* 128 8 */ + struct stack_op * stack_ops; /* 136 8 */ + struct cfi_state * cfi; /* 144 8 */ - /* size: 160, cachelines: 3, members: 29 */ - /* sum members: 158 */ - /* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */ - /* last cacheline: 32 bytes */ + /* size: 152, cachelines: 3, members: 29 */ + /* sum members: 150 */ + /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */ + /* last cacheline: 24 bytes */ }; pre: 5:48.89 real, 220.96 user, 127.55 sys, 24834672 mem post: 5:39.35 real, 215.58 user, 123.69 sys, 23448736 mem Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> Tested-by: Nathan Chancellor <nathan@kernel.org> # build only Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run Link: https://lore.kernel.org/r/20230208172245.572145269@infradead.org
|
#
8b2de412 |
|
08-Feb-2023 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Shrink instruction::{type,visited} Since we don't have that many types in enum insn_type, force it into a u8 and re-arrange member to get rid of the holes, saves another 8 bytes. struct instruction { struct list_head list; /* 0 16 */ struct hlist_node hash; /* 16 16 */ struct list_head call_node; /* 32 16 */ struct section * sec; /* 48 8 */ long unsigned int offset; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ - unsigned int len; /* 64 4 */ - enum insn_type type; /* 68 4 */ - long unsigned int immediate; /* 72 8 */ - u16 dead_end:1; /* 80: 0 2 */ - u16 ignore:1; /* 80: 1 2 */ - u16 ignore_alts:1; /* 80: 2 2 */ - u16 hint:1; /* 80: 3 2 */ - u16 save:1; /* 80: 4 2 */ - u16 restore:1; /* 80: 5 2 */ - u16 retpoline_safe:1; /* 80: 6 2 */ - u16 noendbr:1; /* 80: 7 2 */ - u16 entry:1; /* 80: 8 2 */ + long unsigned int immediate; /* 64 8 */ + unsigned int len; /* 72 4 */ + u8 type; /* 76 1 */ - /* XXX 7 bits hole, try to pack */ + /* Bitfield combined with previous fields */ - s8 instr; /* 82 1 */ - u8 visited; /* 83 1 */ + u16 dead_end:1; /* 76: 8 2 */ + u16 ignore:1; /* 76: 9 2 */ + u16 ignore_alts:1; /* 76:10 2 */ + u16 hint:1; /* 76:11 2 */ + u16 save:1; /* 76:12 2 */ + u16 restore:1; /* 76:13 2 */ + u16 retpoline_safe:1; /* 76:14 2 */ + u16 noendbr:1; /* 76:15 2 */ + u16 entry:1; /* 78: 0 2 */ + u16 visited:4; /* 78: 1 2 */ - /* XXX 4 bytes hole, try to pack */ + /* XXX 3 bits hole, try to pack */ + /* Bitfield combined with next fields */ - struct alt_group * alt_group; /* 88 8 */ - struct symbol * call_dest; /* 96 8 */ - struct instruction * jump_dest; /* 104 8 */ - struct instruction * first_jump_src; /* 112 8 */ - struct reloc * jump_table; /* 120 8 */ + s8 instr; /* 79 1 */ + struct alt_group * alt_group; /* 80 8 */ + struct symbol * call_dest; /* 88 8 */ + struct instruction * jump_dest; /* 96 8 */ + struct instruction * first_jump_src; /* 104 8 */ + struct reloc * jump_table; /* 112 8 */ + struct reloc * reloc; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ - struct reloc * reloc; /* 128 8 */ - struct alternative * alts; /* 136 8 */ - struct symbol * sym; /* 144 8 */ - struct stack_op * stack_ops; /* 152 8 */ - struct cfi_state * cfi; /* 160 8 */ + struct alternative * alts; /* 128 8 */ + struct symbol * sym; /* 136 8 */ + struct stack_op * stack_ops; /* 144 8 */ + struct cfi_state * cfi; /* 152 8 */ - /* size: 168, cachelines: 3, members: 29 */ - /* sum members: 162, holes: 1, sum holes: 4 */ - /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */ - /* last cacheline: 40 bytes */ + /* size: 160, cachelines: 3, members: 29 */ + /* sum members: 158 */ + /* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */ + /* last cacheline: 32 bytes */ }; pre: 5:48.86 real, 220.30 user, 128.34 sys, 24834672 mem post: 5:48.89 real, 220.96 user, 127.55 sys, 24834672 mem Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> Tested-by: Nathan Chancellor <nathan@kernel.org> # build only Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run Link: https://lore.kernel.org/r/20230208172245.501847188@infradead.org
|
#
d5406654 |
|
08-Feb-2023 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Make instruction::alts a single-linked list struct instruction { struct list_head list; /* 0 16 */ struct hlist_node hash; /* 16 16 */ struct list_head call_node; /* 32 16 */ struct section * sec; /* 48 8 */ long unsigned int offset; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ unsigned int len; /* 64 4 */ enum insn_type type; /* 68 4 */ long unsigned int immediate; /* 72 8 */ u16 dead_end:1; /* 80: 0 2 */ u16 ignore:1; /* 80: 1 2 */ u16 ignore_alts:1; /* 80: 2 2 */ u16 hint:1; /* 80: 3 2 */ u16 save:1; /* 80: 4 2 */ u16 restore:1; /* 80: 5 2 */ u16 retpoline_safe:1; /* 80: 6 2 */ u16 noendbr:1; /* 80: 7 2 */ u16 entry:1; /* 80: 8 2 */ /* XXX 7 bits hole, try to pack */ s8 instr; /* 82 1 */ u8 visited; /* 83 1 */ /* XXX 4 bytes hole, try to pack */ struct alt_group * alt_group; /* 88 8 */ struct symbol * call_dest; /* 96 8 */ struct instruction * jump_dest; /* 104 8 */ struct instruction * first_jump_src; /* 112 8 */ struct reloc * jump_table; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ struct reloc * reloc; /* 128 8 */ - struct list_head alts; /* 136 16 */ - struct symbol * sym; /* 152 8 */ - struct stack_op * stack_ops; /* 160 8 */ - struct cfi_state * cfi; /* 168 8 */ + struct alternative * alts; /* 136 8 */ + struct symbol * sym; /* 144 8 */ + struct stack_op * stack_ops; /* 152 8 */ + struct cfi_state * cfi; /* 160 8 */ - /* size: 176, cachelines: 3, members: 29 */ - /* sum members: 170, holes: 1, sum holes: 4 */ + /* size: 168, cachelines: 3, members: 29 */ + /* sum members: 162, holes: 1, sum holes: 4 */ /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */ - /* last cacheline: 48 bytes */ + /* last cacheline: 40 bytes */ }; pre: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem post: 5:48.86 real, 220.30 user, 128.34 sys, 24834672 mem Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> Tested-by: Nathan Chancellor <nathan@kernel.org> # build only Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
|
#
3ee88df1 |
|
08-Feb-2023 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Make instruction::stack_ops a single-linked list struct instruction { struct list_head list; /* 0 16 */ struct hlist_node hash; /* 16 16 */ struct list_head call_node; /* 32 16 */ struct section * sec; /* 48 8 */ long unsigned int offset; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ unsigned int len; /* 64 4 */ enum insn_type type; /* 68 4 */ long unsigned int immediate; /* 72 8 */ u16 dead_end:1; /* 80: 0 2 */ u16 ignore:1; /* 80: 1 2 */ u16 ignore_alts:1; /* 80: 2 2 */ u16 hint:1; /* 80: 3 2 */ u16 save:1; /* 80: 4 2 */ u16 restore:1; /* 80: 5 2 */ u16 retpoline_safe:1; /* 80: 6 2 */ u16 noendbr:1; /* 80: 7 2 */ u16 entry:1; /* 80: 8 2 */ /* XXX 7 bits hole, try to pack */ s8 instr; /* 82 1 */ u8 visited; /* 83 1 */ /* XXX 4 bytes hole, try to pack */ struct alt_group * alt_group; /* 88 8 */ struct symbol * call_dest; /* 96 8 */ struct instruction * jump_dest; /* 104 8 */ struct instruction * first_jump_src; /* 112 8 */ struct reloc * jump_table; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ struct reloc * reloc; /* 128 8 */ struct list_head alts; /* 136 16 */ struct symbol * sym; /* 152 8 */ - struct list_head stack_ops; /* 160 16 */ - struct cfi_state * cfi; /* 176 8 */ + struct stack_op * stack_ops; /* 160 8 */ + struct cfi_state * cfi; /* 168 8 */ - /* size: 184, cachelines: 3, members: 29 */ - /* sum members: 178, holes: 1, sum holes: 4 */ + /* size: 176, cachelines: 3, members: 29 */ + /* sum members: 170, holes: 1, sum holes: 4 */ /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */ - /* last cacheline: 56 bytes */ + /* last cacheline: 48 bytes */ }; pre: 5:58.22 real, 226.69 user, 131.22 sys, 26221520 mem post: 5:58.50 real, 229.64 user, 128.65 sys, 26221520 mem Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> Tested-by: Nathan Chancellor <nathan@kernel.org> # build only Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run Link: https://lore.kernel.org/r/20230208172245.362196959@infradead.org
|
#
dbcdbdfd |
|
22-Sep-2022 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Rework instruction -> symbol mapping Currently insn->func contains a instruction -> symbol link for STT_FUNC symbols. A NULL value is assumed to mean STT_NOTYPE. However, there are also instructions not covered by any symbol at all. This can happen due to __weak symbols for example. Since the current scheme cannot differentiate between no symbol and STT_NOTYPE symbol, change things around. Make insn->sym point to any symbol type such that !insn->sym means no symbol and add a helper insn_func() that check the sym->type to retain the old functionality. This then prepares the way to add code that depends on the distinction between STT_NOTYPE and no symbol at all. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
#
8faea26e |
|
23-Jun-2022 |
Josh Poimboeuf <jpoimboe@kernel.org> |
objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} Commit c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints") removed the save/restore unwind hints because they were no longer needed. Now they're going to be needed again so re-add them. Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de>
|
#
a09a6e23 |
|
14-Jun-2022 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Add entry UNRET validation Since entry asm is tricky, add a validation pass that ensures the retbleed mitigation has been done before the first actual RET instruction. Entry points are those that either have UNWIND_HINT_ENTRY, which acts as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or those that have UWIND_HINT_IRET_REGS at +0. This is basically a variant of validate_branch() that is intra-function and it will simply follow all branches from marked entry points and ensures that all paths lead to ANNOTATE_UNRET_END. If a path hits RET or an indirection the path is a fail and will be reported. There are 3 ANNOTATE_UNRET_END instances: - UNTRAIN_RET itself - exception from-kernel; this path doesn't need UNTRAIN_RET - all early exceptions; these also don't need UNTRAIN_RET Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de>
|
#
96db4a98 |
|
08-Mar-2022 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Read the NOENDBR annotation Read the new NOENDBR annotation. While there, attempt to not bloat struct instruction. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> Link: https://lore.kernel.org/r/20220308154319.586815435@infradead.org
|
#
c509331b |
|
26-Oct-2021 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Shrink struct instruction Any one instruction can only ever call a single function, therefore insn->mcount_loc_node is superfluous and can use insn->call_node. This shrinks struct instruction, which is by far the most numerous structure objtool creates. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Borislav Petkov <bp@suse.de> Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> Tested-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/r/20211026120309.785456706@infradead.org
|
#
8b946cc3 |
|
24-Jun-2021 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Introduce CFI hash Andi reported that objtool on vmlinux.o consumes more memory than his system has, leading to horrific performance. This is in part because we keep a struct instruction for every instruction in the file in-memory. Shrink struct instruction by removing the CFI state (which includes full register state) from it and demand allocating it. Given most instructions don't actually change CFI state, there's lots of repetition there, so add a hash table to find previous CFI instances. Reduces memory consumption (and runtime) for processing an x86_64-allyesconfig: pre: 4:40.84 real, 143.99 user, 44.18 sys, 30624988 mem post: 2:14.61 real, 108.58 user, 25.04 sys, 16396184 mem Suggested-by: Andi Kleen <andi@firstfloor.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20210624095147.756759107@infradead.org
|
#
7bd2a600 |
|
26-Mar-2021 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Cache instruction relocs Track the reloc of instructions in the new instruction->reloc field to avoid having to look them up again later. ( Technically x86 instructions can have two relocations, but not jumps and calls, for which we're using this. ) Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Miroslav Benes <mbenes@suse.cz> Link: https://lkml.kernel.org/r/20210326151300.195441549@infradead.org
|
#
43d5430a |
|
26-Mar-2021 |
Peter Zijlstra <peterz@infradead.org> |
objtool: Keep track of retpoline call sites Provide infrastructure for architectures to rewrite/augment compiler generated retpoline calls. Similar to what we do for static_call()s, keep track of the instructions that are retpoline calls. Use the same list_head, since a retpoline call cannot also be a static_call. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Miroslav Benes <mbenes@suse.cz> Link: https://lkml.kernel.org/r/20210326151300.130805730@infradead.org
|
#
b735bd3e |
|
21-Jan-2021 |
Josh Poimboeuf <jpoimboe@redhat.com> |
objtool: Combine UNWIND_HINT_RET_OFFSET and UNWIND_HINT_FUNC The ORC metadata generated for UNWIND_HINT_FUNC isn't actually very func-like. With certain usages it can cause stack state mismatches because it doesn't set the return address (CFI_RA). Also, users of UNWIND_HINT_RET_OFFSET no longer need to set a custom return stack offset. Instead they just need to specify a func-like situation, so the current ret_offset code is hacky for no good reason. Solve both problems by simplifying the RET_OFFSET handling and converting it into a more useful UNWIND_HINT_FUNC. If we end up needing the old 'ret_offset' functionality again in the future, we should be able to support it pretty easily with the addition of a custom 'sp_offset' in UNWIND_HINT_FUNC. Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Link: https://lore.kernel.org/r/db9d1f5d79dddfbb3725ef6d8ec3477ad199948d.1611263462.git.jpoimboe@redhat.com
|
#
1f9a1b74 |
|
21-Jan-2021 |
Josh Poimboeuf <jpoimboe@redhat.com> |
objtool: Fix retpoline detection in asm code The JMP_NOSPEC macro branches to __x86_retpoline_*() rather than the __x86_indirect_thunk_*() wrappers used by C code. Detect jumps to __x86_retpoline_*() as retpoline dynamic jumps. Presumably this doesn't trigger a user-visible bug. I only found it when testing vmlinux.o validation. Fixes: 39b735332cb8 ("objtool: Detect jumps to retpoline thunks") Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Link: https://lore.kernel.org/r/31f5833e2e4f01e3d755889ac77e3661e906c09f.1611263461.git.jpoimboe@redhat.com
|
#
c9c324dc |
|
18-Dec-2020 |
Josh Poimboeuf <jpoimboe@redhat.com> |
objtool: Support stack layout changes in alternatives The ORC unwinder showed a warning [1] which revealed the stack layout didn't match what was expected. The problem was that paravirt patching had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed the stack layout between the PUSHF and the POP, so unwinding from an interrupt which occurred between those two instructions would fail. Part of the agreed upon solution was to rework the custom paravirt patching code to use alternatives instead, since objtool already knows how to read alternatives (and converging runtime patching infrastructure is always a good thing anyway). But the main problem still remains, which is that runtime patching can change the stack layout. Making stack layout changes in alternatives was disallowed with commit 7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt is going to be doing it, it needs to be supported. One way to do so would be to modify the ORC table when the code gets patched. But ORC is simple -- a good thing! -- and it's best to leave it alone. Instead, support stack layout changes by "flattening" all possible stack states (CFI) from parallel alternative code streams into a single set of linear states. The only necessary limitation is that CFI conflicts are disallowed at all possible instruction boundaries. For example, this scenario is allowed: Alt1 Alt2 Alt3 0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF 0x01 POP %RAX 0x02 NOP ... 0x05 NOP ... 0x07 <insn> The unwind information for offset-0x00 is identical for all 3 alternatives. Similarly offset-0x05 and higher also are identical (and the same as 0x00). However offset-0x01 has deviating CFI, but that is only relevant for Alt3, neither of the other alternative instruction streams will ever hit that offset. This scenario is NOT allowed: Alt1 Alt2 0x00 CALL *pv_ops.save_fl PUSHF 0x01 NOP6 ... 0x07 NOP POP %RAX The problem here is that offset-0x7, which is an instruction boundary in both possible instruction patch streams, has two conflicting stack layouts. [ The above examples were stolen from Peter Zijlstra. ] The new flattened CFI array is used both for the detection of conflicts (like the second example above) and the generation of linear ORC entries. BTW, another benefit of these changes is that, thanks to some related cleanups (new fake nops and alt_group struct) objtool can finally be rid of fake jumps, which were a constant source of headaches. [1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble Cc: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
#
b23cc71c |
|
18-Dec-2020 |
Josh Poimboeuf <jpoimboe@redhat.com> |
objtool: Add 'alt_group' struct Create a new struct associated with each group of alternatives instructions. This will help with the removal of fake jumps, and more importantly with adding support for stack layout changes in alternatives. Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
#
ab4e0744 |
|
17-Dec-2020 |
Josh Poimboeuf <jpoimboe@redhat.com> |
objtool: Refactor ORC section generation Decouple ORC entries from instructions. This simplifies the control/data flow, and is going to make it easier to support alternative instructions which change the stack layout. Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
#
7786032e |
|
12-Nov-2020 |
Vasily Gorbik <gor@linux.ibm.com> |
objtool: Rework header include paths Currently objtool headers are being included either by their base name or included via ../ from a parent directory. In case of a base name usage: #include "warn.h" #include "arch_elf.h" it does not make it apparent from which directory the file comes from. To make it slightly better, and actually to avoid name clashes some arch specific files have "arch_" suffix. And files from an arch folder have to revert to including via ../ e.g: #include "../../elf.h" With additional architectures support and the code base growth there is a need for clearer headers naming scheme for multiple reasons: 1. to make it instantly obvious where these files come from (objtool itself / objtool arch|generic folders / some other external files), 2. to avoid name clashes of objtool arch specific headers, potential obtool arch generic headers and the system header files (there is /usr/include/elf.h already), 3. to avoid ../ includes and improve code readability. 4. to give a warm fuzzy feeling to developers who are mostly kernel developers and are accustomed to linux kernel headers arranging scheme. Doesn't this make it instantly obvious where are these files come from? #include <objtool/warn.h> #include <arch/elf.h> And doesn't it look nicer to avoid ugly ../ includes? Which also guarantees this is elf.h from the objtool and not /usr/include/elf.h. #include <objtool/elf.h> This patch defines and implements new objtool headers arranging scheme. Which is: - all generic headers go to include/objtool (similar to include/linux) - all arch headers go to arch/$(SRCARCH)/include/arch (to get arch prefix). This is similar to linux arch specific "asm/*" headers but we are not abusing "asm" name and calling it what it is. This also helps to prevent name clashes (arch is not used in system headers or kernel exports). To bring objtool to this state the following things are done: 1. current top level tools/objtool/ headers are moved into include/objtool/ subdirectory, 2. arch specific headers, currently only arch/x86/include/ are moved into arch/x86/include/arch/ and were stripped of "arch_" suffix, 3. new -I$(srctree)/tools/objtool/include include path to make includes like <objtool/warn.h> possible, 4. rewriting file includes, 5. make git not to ignore include/objtool/ subdirectory. Signed-off-by: Vasily Gorbik <gor@linux.ibm.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|