处理器正式进入保护模式(protected_mode_jump函数分析)

在设置完IDTGDT和其他一些东西以后,内核调用protected_mode_jump正式进入保护模式。

这部分代码在/source/arch/x86/boot/pmjump.S#L24中实现。

/*
 * The actual transition into protected mode
 */

#include <asm/boot.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <linux/linkage.h>

    .text
    .code16

/*
 * void protected_mode_jump(u32 entrypoint, u32 bootparams);
 */
SYM_FUNC_START_NOALIGN(protected_mode_jump)
    movl    %edx, %esi      # Pointer to boot_params table

    xorl    %ebx, %ebx
    movw    %cs, %bx
    shll    $4, %ebx
    addl    %ebx, 2f
    jmp 1f          # Short jump to serialize on 386/486
1:

    movw    $__BOOT_DS, %cx
    movw    $__BOOT_TSS, %di

    movl    %cr0, %edx
    orb $X86_CR0_PE, %dl    # Protected mode
    movl    %edx, %cr0

    # Transition to 32-bit mode
    .byte   0x66, 0xea      # ljmpl opcode
2:  .long   .Lin_pm32       # offset
    .word   __BOOT_CS       # segment
SYM_FUNC_END(protected_mode_jump)

其中 in_pm32包含了对32-bit入口的跳转语句:

    .code32
    .section ".text32","ax"
SYM_FUNC_START_LOCAL_NOALIGN(.Lin_pm32)
    # Set up data segments for flat 32-bit mode
    movl    %ecx, %ds
    movl    %ecx, %es
    movl    %ecx, %fs
    movl    %ecx, %gs
    movl    %ecx, %ss
    # The 32-bit code sets up its own stack, but this way we do have
    # a valid stack if some debugging hack wants to use it.
    addl    %ebx, %esp

    # Set up TR to make Intel VT happy
    ltr %di

    # Clear registers to allow for future extensions to the
    # 32-bit boot protocol
    xorl    %ecx, %ecx
    xorl    %edx, %edx
    xorl    %ebx, %ebx
    xorl    %ebp, %ebp
    xorl    %edi, %edi

    # Set up LDTR to make Intel VT happy
    lldt    %cx

    jmpl    *%eax           # Jump to the 32-bit entrypoint
SYM_FUNC_END(.Lin_pm32)

32-bit的入口地址位于汇编文件/source/arch/x86/boot/compressed/head_64.S中,尽管它的名字包含 _64后缀。我们可以在 /source/arch/x86/boot/compressed目录下看到两个相似的文件:

  • /source/arch/x86/boot/compressed/head_32.S.
  • /source/arch/x86/boot/compressed/head_64.S;

然而32-bit模式的入口位于第二个文件中,而第一个文件在 x86_64配置下不会参与编译。

我们可以查看/source/arch/x86/boot/compressed/Makefile#L76

vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \
    $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
    $(obj)/piggy.o $(obj)/cpuflags.o

代码中的 head_*取决于 $(BITS) 变量的值,而该值由”架构”决定。我们可以在/source/arch/x86/Makefile#L64找到相关代码:

ifeq ($(CONFIG_X86_32),y)
        BITS := 32
        ......
else
        BITS := 64
        ......

处理器进入长模式(startup_32函数分析)

现在程序从protected_mode_jump来到了startup_32中,这个函数将为处理器进入长模式long mode做好准备,并且直接跳转进入长模式:

    .code32
    .text

#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
#include "pgtable.h"

/*
 * Locally defined symbols should be marked hidden:
 */
    .hidden _bss
    .hidden _ebss
    .hidden _got
    .hidden _egot

    __HEAD
    .code32
SYM_FUNC_START(startup_32)
    /*
     * 32bit entry is 0 and it is ABI so immutable!
     * 32bit 的条目是 0 ,它是 Application binary interface ,因此它的值是静态的!
     * If we come here directly from a bootloader,
     * kernel(text+data+bss+brk) ramdisk, zero_page, command line
     * all need to be under the 4G limit.
     */
    cld
    /*
     * Test KEEP_SEGMENTS flag to see if the bootloader is asking
     * us to not reload segments
     */
    testb $KEEP_SEGMENTS, BP_loadflags(%esi)
    jnz 1f

    cli
    movl    $(__BOOT_DS), %eax
    movl    %eax, %ds
    movl    %eax, %es
    movl    %eax, %ss
1:

/*
 * Calculate the delta between where we were compiled to run
 * at and where we were actually loaded at.  This can only be done
 * with a short local call on x86.  Nothing  else will tell us what
 * address we are running at.  The reserved chunk of the real-mode
 * data at 0x1e4 (defined as a scratch field) are used as the stack
 * for this calculation. Only 4 bytes are needed.
 */
    leal    (BP_scratch+4)(%esi), %esp
    call    1f
1:  popl    %ebp
    subl    $1b, %ebp

/* setup a stack and make sure cpu supports long mode. */
    movl    $boot_stack_end, %eax
    addl    %ebp, %eax
    movl    %eax, %esp

    call    verify_cpu
    testl   %eax, %eax
    jnz .Lno_longmode

/*
 * Compute the delta between where we were compiled to run at
 * and where the code will actually run at.
 *
 * %ebp contains the address we are loaded at by the boot loader and %ebx
 * contains the address where we should move the kernel image temporarily
 * for safe in-place decompression.
 */

#ifdef CONFIG_RELOCATABLE
    movl    %ebp, %ebx
    movl    BP_kernel_alignment(%esi), %eax
    decl    %eax
    addl    %eax, %ebx
    notl    %eax
    andl    %eax, %ebx
    cmpl    $LOAD_PHYSICAL_ADDR, %ebx
    jge 1f
#endif
    movl    $LOAD_PHYSICAL_ADDR, %ebx
1:

    /* Target address to relocate to for decompression */
    movl    BP_init_size(%esi), %eax
    subl    $_end, %eax
    addl    %eax, %ebx

/*
 * Prepare for entering 64 bit mode
 */

    /* Load new GDT with the 64bit segments using 32bit descriptor */
    addl    %ebp, gdt+2(%ebp)
    lgdt    gdt(%ebp)

    /* Enable PAE mode */
    movl    %cr4, %eax
    orl $X86_CR4_PAE, %eax
    movl    %eax, %cr4

 /*
  * Build early 4G boot pagetable
  */
    /*
     * If SEV is active then set the encryption mask in the page tables.
     * This will insure that when the kernel is copied and decompressed
     * it will be done so encrypted.
     */
    call    get_sev_encryption_bit
    xorl    %edx, %edx
    testl   %eax, %eax
    jz  1f
    subl    $32, %eax   /* Encryption bit is always above bit 31 */
    bts %eax, %edx  /* Set encryption mask for page tables */
1:

    /* Initialize Page tables to 0 */
    leal    pgtable(%ebx), %edi
    xorl    %eax, %eax
    movl    $(BOOT_INIT_PGT_SIZE/4), %ecx
    rep stosl

    /* Build Level 4 */
    leal    pgtable + 0(%ebx), %edi
    leal    0x1007 (%edi), %eax
    movl    %eax, 0(%edi)
    addl    %edx, 4(%edi)

    /* Build Level 3 */
    leal    pgtable + 0x1000(%ebx), %edi
    leal    0x1007(%edi), %eax
    movl    $4, %ecx
1:  movl    %eax, 0x00(%edi)
    addl    %edx, 0x04(%edi)
    addl    $0x00001000, %eax
    addl    $8, %edi
    decl    %ecx
    jnz 1b

    /* Build Level 2 */
    leal    pgtable + 0x2000(%ebx), %edi
    movl    $0x00000183, %eax
    movl    $2048, %ecx
1:  movl    %eax, 0(%edi)
    addl    %edx, 4(%edi)
    addl    $0x00200000, %eax
    addl    $8, %edi
    decl    %ecx
    jnz 1b

    /* Enable the boot page tables */
    leal    pgtable(%ebx), %eax
    movl    %eax, %cr3

    /* Enable Long mode in EFER (Extended Feature Enable Register) */
    movl    $MSR_EFER, %ecx
    rdmsr
    btsl    $_EFER_LME, %eax
    wrmsr

    /* After gdt is loaded */
    xorl    %eax, %eax
    lldt    %ax
    movl    $__BOOT_TSS, %eax
    ltr %ax

    /*
     * Setup for the jump to 64bit mode
     *
     * When the jump is performend we will be in long mode but
     * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
     * (and in turn EFER.LMA = 1).  To jump into 64bit mode we use
     * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
     * We place all of the values on our mini stack so lret can
     * used to perform that far jump.
     */
    pushl   $__KERNEL_CS
    leal    startup_64(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
    movl    efi32_boot_args(%ebp), %edi
    cmp $0, %edi
    jz  1f
    leal    efi64_stub_entry(%ebp), %eax
    movl    %esi, %edx
    movl    efi32_boot_args+4(%ebp), %esi
1:
#endif
    pushl   %eax

    /* Enter paged protected Mode, activating Long Mode */
    movl    $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
    movl    %eax, %cr0

    /* Jump from 32bit compatibility mode into 64bit mode. */
    lret
SYM_FUNC_END(startup_32)

处理器进入长模式后将跳入startup_64函数

    .code64
    .org 0x200
SYM_CODE_START(startup_64)
    /*
     * 64bit entry is 0x200 and it is ABI so immutable!
     * We come here either from startup_32 or directly from a
     * 64bit bootloader.
     * If we come here from a bootloader, kernel(text+data+bss+brk),
     * ramdisk, zero_page, command line could be above 4G.
     * We depend on an identity mapped page table being provided
     * that maps our entire kernel(text+data+bss+brk), zero page
     * and command line.
     */

    /* Setup data segments. */
    xorl    %eax, %eax
    movl    %eax, %ds
    movl    %eax, %es
    movl    %eax, %ss
    movl    %eax, %fs
    movl    %eax, %gs

    /*
     * Compute the decompressed kernel start address.  It is where
     * we were loaded at aligned to a 2M boundary. %rbp contains the
     * decompressed kernel start address.
     *
     * If it is a relocatable kernel then decompress and run the kernel
     * from load address aligned to 2MB addr, otherwise decompress and
     * run the kernel from LOAD_PHYSICAL_ADDR
     *
     * We cannot rely on the calculation done in 32-bit mode, since we
     * may have been invoked via the 64-bit entry point.
     */

    /* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
    leaq    startup_32(%rip) /* - $startup_32 */, %rbp
    movl    BP_kernel_alignment(%rsi), %eax
    decl    %eax
    addq    %rax, %rbp
    notq    %rax
    andq    %rax, %rbp
    cmpq    $LOAD_PHYSICAL_ADDR, %rbp
    jge 1f
#endif
    movq    $LOAD_PHYSICAL_ADDR, %rbp
1:

    /* Target address to relocate to for decompression */
    movl    BP_init_size(%rsi), %ebx
    subl    $_end, %ebx
    addq    %rbp, %rbx

    /* Set up the stack */
    leaq    boot_stack_end(%rbx), %rsp

    /*
     * paging_prepare() and cleanup_trampoline() below can have GOT
     * references. Adjust the table with address we are running at.
     *
     * Zero RAX for adjust_got: the GOT was not adjusted before;
     * there's no adjustment to undo.
     */
    xorq    %rax, %rax

    /*
     * Calculate the address the binary is loaded at and use it as
     * a GOT adjustment.
     */
    call    1f
1:  popq    %rdi
    subq    $1b, %rdi

    call    .Ladjust_got

    /*
     * At this point we are in long mode with 4-level paging enabled,
     * but we might want to enable 5-level paging or vice versa.
     *
     * The problem is that we cannot do it directly. Setting or clearing
     * CR4.LA57 in long mode would trigger #GP. So we need to switch off
     * long mode and paging first.
     *
     * We also need a trampoline in lower memory to switch over from
     * 4- to 5-level paging for cases when the bootloader puts the kernel
     * above 4G, but didn't enable 5-level paging for us.
     *
     * The same trampoline can be used to switch from 5- to 4-level paging
     * mode, like when starting 4-level paging kernel via kexec() when
     * original kernel worked in 5-level paging mode.
     *
     * For the trampoline, we need the top page table to reside in lower
     * memory as we don't have a way to load 64-bit values into CR3 in
     * 32-bit mode.
     *
     * We go though the trampoline even if we don't have to: if we're
     * already in a desired paging mode. This way the trampoline code gets
     * tested on every boot.
     */

    /* Make sure we have GDT with 32-bit code segment */
    leaq    gdt(%rip), %rax
    movq    %rax, gdt64+2(%rip)
    lgdt    gdt64(%rip)

    /*
     * paging_prepare() sets up the trampoline and checks if we need to
     * enable 5-level paging.
     *
     * paging_prepare() returns a two-quadword structure which lands
     * into RDX:RAX:
     *   - Address of the trampoline is returned in RAX.
     *   - Non zero RDX means trampoline needs to enable 5-level
     *     paging.
     *
     * RSI holds real mode data and needs to be preserved across
     * this function call.
     */
    pushq   %rsi
    movq    %rsi, %rdi      /* real mode address */
    call    paging_prepare
    popq    %rsi

    /* Save the trampoline address in RCX */
    movq    %rax, %rcx

    /*
     * Load the address of trampoline_return() into RDI.
     * It will be used by the trampoline to return to the main code.
     */
    leaq    trampoline_return(%rip), %rdi

    /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
    pushq   $__KERNEL32_CS
    leaq    TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
    pushq   %rax
    lretq
trampoline_return:
    /* Restore the stack, the 32-bit trampoline uses its own stack */
    leaq    boot_stack_end(%rbx), %rsp

    /*
     * cleanup_trampoline() would restore trampoline memory.
     *
     * RDI is address of the page table to use instead of page table
     * in trampoline memory (if required).
     *
     * RSI holds real mode data and needs to be preserved across
     * this function call.
     */
    pushq   %rsi
    leaq    top_pgtable(%rbx), %rdi
    call    cleanup_trampoline
    popq    %rsi

    /* Zero EFLAGS */
    pushq   $0
    popfq

    /*
     * Previously we've adjusted the GOT with address the binary was
     * loaded at. Now we need to re-adjust for relocation address.
     *
     * Calculate the address the binary is loaded at, so that we can
     * undo the previous GOT adjustment.
     */
    call    1f
1:  popq    %rax
    subq    $1b, %rax

    /* The new adjustment is the relocation address */
    movq    %rbx, %rdi
    call    .Ladjust_got

/*
 * Copy the compressed kernel to the end of our buffer
 * where decompression in place becomes safe.
 */
    pushq   %rsi
    leaq    (_bss-8)(%rip), %rsi
    leaq    (_bss-8)(%rbx), %rdi
    movq    $_bss /* - $startup_32 */, %rcx
    shrq    $3, %rcx
    std
    rep movsq
    cld
    popq    %rsi

/*
 * Jump to the relocated address.
 */
    leaq    .Lrelocated(%rbx), %rax
    jmp *%rax
SYM_CODE_END(startup_64)
分类: CTF

0 条评论

发表评论

电子邮件地址不会被公开。 必填项已用*标注