在这里将完成内核解压的准备工作。内核解压的主函数代码位于/source/arch/x86/boot/compressed/misc.c中的 decompress_kernel函数中,此处不再分析。

内核解压完成以后,程序返回secondary_startup_64函数(实现于/source/arch/x86/kernel/head_64.S)。在这个函数中,我们开始构建 identity-mapped pages,并在那之后检查NX位,配置 Extended Feature Enable Register,使用 lgdt指令更新早期的Global Descriptor Table

SYM_CODE_START(secondary_startup_64)
    UNWIND_HINT_EMPTY
    /*
     * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
     * and someone has loaded a mapped page table.
     *
     * %rsi holds a physical pointer to real_mode_data.
     *
     * We come here either from startup_64 (using physical addresses)
     * or from trampoline.S (using virtual addresses).
     *
     * Using virtual addresses from trampoline.S removes the need
     * to have any identity mapped pages in the kernel page table
     * after the boot processor executes this code.
     */

    /* Sanitize CPU configuration */
    call verify_cpu

    /*
     * Retrieve the modifier (SME encryption mask if SME is active) to be
     * added to the initial pgdir entry that will be programmed into CR3.
     */
    pushq   %rsi
    call    __startup_secondary_64
    popq    %rsi

    /* Form the CR3 value being sure to include the CR3 modifier */
    addq    $(init_top_pgt - __START_KERNEL_map), %rax
1:

    /* Enable PAE mode, PGE and LA57 */
    movl    $(X86_CR4_PAE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
    testl   $1, __pgtable_l5_enabled(%rip)
    jz  1f
    orl $X86_CR4_LA57, %ecx
1:
#endif
    movq    %rcx, %cr4

    /* Setup early boot stage 4-/5-level pagetables. */
    addq    phys_base(%rip), %rax
    movq    %rax, %cr3

    /* Ensure I am executing from virtual addresses */
    movq    $1f, %rax
    ANNOTATE_RETPOLINE_SAFE
    jmp *%rax
1:
    UNWIND_HINT_EMPTY

    /* Check if nx is implemented */
    movl    $0x80000001, %eax
    cpuid
    movl    %edx,%edi

    /* Setup EFER (Extended Feature Enable Register) */
    movl    $MSR_EFER, %ecx
    rdmsr
    btsl    $_EFER_SCE, %eax    /* Enable System Call */
    btl $20,%edi        /* No Execute supported? */
    jnc     1f
    btsl    $_EFER_NX, %eax
    btsq    $_PAGE_BIT_NX,early_pmd_flags(%rip)
1:  wrmsr               /* Make changes effective */

    /* Setup cr0 */
    movl    $CR0_STATE, %eax
    /* Make changes effective */
    movq    %rax, %cr0

    /* Setup a boot time stack */
    movq initial_stack(%rip), %rsp

    /* zero EFLAGS after setting rsp */
    pushq $0
    popfq

    /*
     * We must switch to a new descriptor in kernel space for the GDT
     * because soon the kernel won't have access anymore to the userspace
     * addresses where we're currently running on. We have to do that here
     * because in 32bit we couldn't load a 64bit linear address.
     */
    lgdt    early_gdt_descr(%rip)

    /* set up data segments */
    xorl %eax,%eax
    movl %eax,%ds
    movl %eax,%ss
    movl %eax,%es

    /*
     * We don't really need to load %fs or %gs, but load them anyway
     * to kill any stale realmode selectors.  This allows execution
     * under VT hardware.
     */
    movl %eax,%fs
    movl %eax,%gs

    /* Set up %gs.
     *
     * The base of %gs always points to fixed_percpu_data. If the
     * stack protector canary is enabled, it is located at %gs:40.
     * Note that, on SMP, the boot cpu uses init data section until
     * the per cpu areas are set up.
     */
    movl    $MSR_GS_BASE,%ecx
    movl    initial_gs(%rip),%eax
    movl    initial_gs+4(%rip),%edx
    wrmsr

    /* rsi is pointer to real mode structure with interesting info.
       pass it to C */
    movq    %rsi, %rdi

.Ljump_to_C_code:
    /*
     * Jump to run C code and to be on a real kernel address.
     * Since we are running on identity-mapped space we have to jump
     * to the full 64bit address, this is only possible as indirect
     * jump.  In addition we need to ensure %cs is set so we make this
     * a far return.
     *
     * Note: do not change to far jump indirect with 64bit offset.
     *
     * AMD does not support far jump indirect with 64bit offset.
     * AMD64 Architecture Programmer's Manual, Volume 3: states only
     *  JMP FAR mem16:16 FF /5 Far jump indirect,
     *      with the target specified by a far pointer in memory.
     *  JMP FAR mem16:32 FF /5 Far jump indirect,
     *      with the target specified by a far pointer in memory.
     *
     * Intel64 does support 64bit offset.
     * Software Developer Manual Vol 2: states:
     *  FF /5 JMP m16:16 Jump far, absolute indirect,
     *      address given in m16:16
     *  FF /5 JMP m16:32 Jump far, absolute indirect,
     *      address given in m16:32.
     *  REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
     *      address given in m16:64.
     */
    pushq   $.Lafter_lret   # put return address on stack for unwinder
    xorl    %ebp, %ebp  # clear frame pointer
    movq    initial_code(%rip), %rax
    pushq   $__KERNEL_CS    # set correct cs
    pushq   %rax        # target address in negative space
    lretq
.Lafter_lret:
SYM_CODE_END(secondary_startup_64)

这里我们着重关心设置 gs寄存器的代码:

/* Set up %gs.
*
* The base of %gs always points to fixed_percpu_data. If the
* stack protector canary is enabled, it is located at %gs:40.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl    $MSR_GS_BASE,%ecx
movl    initial_gs(%rip),%eax
movl    initial_gs+4(%rip),%edx
wrmsr

wrmsr指令将edx:eax寄存器指定的地址中的数据写入到由ecx寄存器指定的model specific register中。由代码可以看到,ecx中的值是$MSR_GS_BASE,该值在/source/arch/x86/include/uapi/asm/msr-index.h中定义:

#define MSR_GS_BASE 0xc0000101

由此可见,MSR_GS_BASE定义了 model specific register的编号。由于 cs, ds, es,和 ss在64-bit模式中不再使用,这些寄存器中的值将会被忽略,但我们可以通过 fsgs寄存器来访问内存空间。model specific register提供了一种后门 back door来访问这些段寄存器,也让我们可以通过段寄存器 fsgs来访问64-bit的基地址。看起来这部分代码映射在 GS.base域中。再看到 initial_gs函数的定义:

// In /source/arch/x86/kernel/head_64.S#L265
SYM_DATA(initial_gs,    .quad INIT_PER_CPU_VAR(fixed_percpu_data))

可以发现,initial_gs 指向 fixed_percpu_data,这段代码将 fixed_percpu_data传递给 INIT_PER_CPU_VAR宏,后者只是给输入参数添加了 init_per_cpu__前缀而已。在此得出了符号 init_per_cpu__fixed_percpu_data。再看到/source/arch/x86/kernel/vmlinux.lds.S中有如下定义:

/*
 * Per-cpu symbols which need to be offset from __per_cpu_load
 * for the boot processor.
 */
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(fixed_percpu_data);
INIT_PER_CPU(irq_stack_backing_store);

这段代码告诉我们符号 init_per_cpu__fixed_percpu_data的地址将会是 fixed_percpu_data + __per_cpu_load

fixed_percpu_data的定义出现在/source/arch/x86/include/asm/processor.h#L437中,其中的 DECLARE_INIT_PER_CPU宏展开后又调用了 init_per_cpu_var宏:

#ifdef CONFIG_X86_64
struct fixed_percpu_data {
    /*
     * GCC hardcodes the stack canary as %gs:40.  Since the
     * irq_stack is the object at %gs:0, we reserve the bottom
     * 48 bytes of the irq stack for the canary.
     */
    char        gs_base[40];
    // stack_canary 对于中断栈来说是一个用来验证栈是否已经被修改的栈保护者(stack protector)。
    // gs_base 是一个 40 字节的数组,GCC 要求 stack canary在被修正过的偏移量上
    // gs 的值在 x86_64 架构上必须是 40,在 x86 架构上必须是 20。
    unsigned long   stack_canary;
};

DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
DECLARE_INIT_PER_CPU(fixed_percpu_data);

// In /source/arch/x86/include/asm/percpu.h#L77
#define DECLARE_INIT_PER_CPU(var) \
       extern typeof(var) init_per_cpu_var(var)

// In /source/arch/x86/include/asm/percpu.h#L81
#ifdef CONFIG_X86_64_SMP
#define init_per_cpu_var(var)  init_per_cpu__##var
#else
#define init_per_cpu_var(var)  var
#endif

将所有的宏展开之后我们可以得到与之前相同的名称 init_per_cpu__fixed_percpu_data,但此时它不再只是一个符号,而成了一个变量。请注意表达式 typeof(var),在此时 varfixed_percpu_data

到此为止,我们定义了ABSOLUTE(x) + __per_cpu_load的第一个变量并且知道了它的地址。再看到第二个符号 __per_cpu_load,该符号定义在/source/include/asm-generic/sections.h#L42,这个符号定义了一系列 per-cpu变量:

extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];

这些符号代表了这一系列变量的数据区域的基地址,回到之前的代码中:

movl    $MSR_GS_BASE,%ecx
movl    initial_gs(%rip),%eax
movl    initial_gs+4(%rip),%edx
wrmsr

这里通过 MSR_GS_BASE指定了一个平台相关寄存器,然后将 initial_gs的64-bit地址放到了 edx:eax段寄存器中,然后执行 wrmsr指令,将 init_per_cpu__fixed_percpu_data的基地址放入了 gs寄存器,而这个地址将是中断栈的栈底地址。

在此之后我们将进入 x86_64_start_kernel函数中,此函数定义在/source/arch/x86/kernel/head64.c。在这个函数中,将完成最后的准备工作,之后就要进入到与平台无关的通用内核代码,在这个过程中,会将中断服务程序入口地址填写到早期 Interrupt Descriptor Table中。

分类: CTF

0 条评论

发表评论

电子邮件地址不会被公开。 必填项已用*标注