文章首发于安全客 ,本文由安全客原创发布
转载,请参考转载声明,注明出处: https://www.anquanke.com/post/id/202988
安全客 – 有思想的安全新媒体
Kernel Pwn 学习之路(五)
0x01 前言
由于关于Kernel安全的文章实在过于繁杂,本文有部分内容大篇幅或全文引用了参考文献,若出现此情况的,将在相关内容的开头予以说明,部分引用参考文献的将在文件结尾的参考链接中注明。
Kernel的相关知识以及一些实例在Kernel中的利用已经在Kernel Pwn 学习之路(一)(二)
给予了说明
Kernel中内存管理的相关知识已经在Kernel Pwn 学习之路(三)
给予了说明
本文主要接续Kernel Pwn 学习之路(四)
,继续研究内核中断的相关机制。本文涉及到的所有Linux Kernel
相关代码均基于5.6.2
版本。
限于篇幅的原因,本文仅介绍了异常中断前处理,下一篇文章将深入中断服务函数,介绍其内部实现~
【传送门】:Kernel Pwn 学习之路(一)
【传送门】:Kernel Pwn 学习之路(二)
【传送门】:Kernel Pwn 学习之路(三)
【传送门】:Kernel Pwn 学习之路(四)
0x02 通用内核代码中的IDT相关处理
在上一篇文章的分析中,处理机进入了保护模式以及长模式,在平台相关代码中完成了IDT
的初始化。在那之后流程将转移到通用内核代码,接下来我们进行分析通用内核代码中的IDT相关处理代码。
入口函数在/source/init/main.c
中实现(这里省略不分析的函数),这个函数将完成内核以pid - 1
运行第一个init
进程 之前的所有初始化工作。
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
......
local_irq_disable(); // Line 12
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them.
*/
......
setup_arch(&command_line); // Line 23
......
boot_init_stack_canary(); // Line 123
......
early_boot_irqs_disabled = false;
local_irq_enable(); // Line 133
......
}
为中断栈设置Stack Canary
在start_kernel()
的line 123
调用了boot_init_stack_canary()
来设置canary值来缓解中断栈溢出。
此函数在/source/arch/x86/include/asm/stackprotector.h#L61
处实现
/* SPDX-License-Identifier: GPL-2.0 */
/*
* GCC stack protector support.
*
* Stack protector works by putting predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
* returning from the function. The pattern is called stack canary
* and unfortunately gcc requires it to be at a fixed offset from %gs.
* On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
* and x86_32 use segment registers differently and thus handles this
* requirement differently.
*
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
* On x86_32, it's slightly more complicated. As in x86_64, %gs is
* used for userland TLS. Unfortunately, some processors are much
* slower at loading segment registers with different value when
* entering and leaving the kernel, so the kernel uses %fs for percpu
* area and manages %gs lazily so that %gs is switched only when
* necessary, usually during task switch.
*
* As gcc requires the stack canary at %gs:20, %gs can't be managed
* lazily if stack protector is enabled, so the kernel saves and
* restores userland %gs on kernel entry and exit. This behavior is
* controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
* system.h to hide the details.
*/
#ifndef _ASM_STACKPROTECTOR_H
#define _ASM_STACKPROTECTOR_H 1
#ifdef CONFIG_STACKPROTECTOR
#include <asm/tsc.h>
#include <asm/processor.h>
#include <asm/percpu.h>
#include <asm/desc.h>
#include <linux/random.h>
#include <linux/sched.h>
/*
* 24 byte read-only segment initializer for stack canary. Linker
* can't handle the address bit shifting. Address will be set in
* head_32 for boot CPU and setup_per_cpu_areas() for others.
*/
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
/*
* Initialize the stackprotector canary value.
*
* NOTE: this must only be called from functions that never return,
* and it must always be inlined.
*/
static __always_inline void boot_init_stack_canary(void)
{
u64 canary;
u64 tsc;
/*
* 如果设置了内核配置选项 CONFIG_X86_64 ,那么一开始将检查结构体 fixed_percpu_data 的状态
* 这个结构体代表了 per-cpu 中断栈,其与 stack_canary 值中间有 40 个字节的 offset
*/
#ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
#endif
/*
* We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
* on during the bootup the random pool has true entropy too.
* 使用随机数和时戳计数器计算新的 canary 值
*/
get_random_bytes(&canary, sizeof(canary));
tsc = rdtsc();
canary += tsc + (tsc << 32UL);
canary &= CANARY_MASK;
current->stack_canary = canary;
#ifdef CONFIG_X86_64
// 通过 this_cpu_write 宏将 canary 值写入了 fixed_percpu_data 中:
this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
this_cpu_write(stack_canary.canary, canary);
#endif
}
......
#else /* STACKPROTECTOR */
......
#endif /* _ASM_STACKPROTECTOR_H */
它的实现取决于 CONFIG_STACKPROTECTOR
这个内核配置选项。如果该选项没有置位,那该函数将是一个空函数。
禁用/启用本地中断
在start_kernel()
的line 12
调用了local_irq_disable()
来禁用本地中断。
在start_kernel()
的line 133
调用了local_irq_enable()
来启用本地中断。
local_irq_enable()
是一个宏定义,它定义在/source/include/linux/irqflags.h#L109
local_irq_disable()
是一个宏定义,它定义在/source/include/linux/irqflags.h#L111
/*
* The local_irq_*() APIs are equal to the raw_local_irq*()
* if !TRACE_IRQFLAGS.
*/
#ifdef CONFIG_TRACE_IRQFLAGS
#define local_irq_enable() \
do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
#define local_irq_disable() \
do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
......
#else /* !CONFIG_TRACE_IRQFLAGS */
#define local_irq_enable() do { raw_local_irq_enable(); } while (0)
#define local_irq_disable() do { raw_local_irq_disable(); } while (0)
......
#endif /* CONFIG_TRACE_IRQFLAGS */
当 CONFIG_TRACE_IRQFLAGS_SUPPORT
选项置位时, local_irq_*
宏将同时调用 trace_hardirqs_*
函数。在Linux死锁检测模块lockdep中有一项功能 irq-flags tracing
,它可以追踪 hardirq
和 softirq
的状态。在这种情况下, lockdep
死锁检测模块可以提供系统中关于硬/软中断的开/关事件的相关信息。
函数 trace_hardirqs_*
的定义位于/source/kernel/trace/trace_preemptirq.c#L22
void trace_hardirqs_on(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
if (!in_nmi())
trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
NOKPROBE_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
if (!in_nmi())
trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
}
lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off);
NOKPROBE_SYMBOL(trace_hardirqs_off);
可见它只是调用了 lockdep_hardirqs_*
函数。 lockdep_hardirqs_*
函数,该函数检查了当前进程的 hardirqs_enabled
域,如果本次 local_irq_disable
调用是冗余的话,便使 redundant_hardirqs_off
域的值增长,否则便使 hardirqs_off_events
域的值增加。这两个域或其它与死锁检测模块 lockdep
统计相关的域定义在/source/kernel/locking/lockdep_internals.h#L168
处的 lockdep_stats
结构体中:
/*
* Various lockdep statistics.
* We want them per cpu as they are often accessed in fast path
* and we want to avoid too much cache bouncing.
*/
struct lockdep_stats {
unsigned long chain_lookup_hits;
unsigned int chain_lookup_misses;
unsigned long hardirqs_on_events;
unsigned long hardirqs_off_events;
unsigned long redundant_hardirqs_on;
unsigned long redundant_hardirqs_off;
unsigned long softirqs_on_events;
unsigned long softirqs_off_events;
unsigned long redundant_softirqs_on;
unsigned long redundant_softirqs_off;
int nr_unused_locks;
unsigned int nr_redundant_checks;
unsigned int nr_redundant;
unsigned int nr_cyclic_checks;
unsigned int nr_find_usage_forwards_checks;
unsigned int nr_find_usage_backwards_checks;
/*
* Per lock class locking operation stat counts
*/
unsigned long lock_class_ops[MAX_LOCKDEP_KEYS];
};
如果开启了 CONFIG_DEBUG_LOCKDEP
内核配置选项,lockdep_stats_debug_show
函数会将所有的调试信息写入 /proc/lockdep
文件中。
接下来来分析 raw_local_irq_disable
,这个宏定义在/source/include/linux/irqflags.h#L79
处实现,其展开后的样子是:
/*
* Wrap the arch provided IRQ routines to provide appropriate checks.
*/
#define raw_local_irq_disable() arch_local_irq_disable()
#define raw_local_irq_enable() arch_local_irq_enable()
// In /source/arch/x86/include/asm/irqflags.h#L87
static inline notrace void arch_local_irq_disable(void)
{
native_irq_disable();
}
static inline notrace void arch_local_irq_enable(void)
{
native_irq_enable();
}
// In /source/arch/x86/include/asm/irqflags.h#L47
static inline void native_irq_disable(void)
{
asm volatile("cli": : :"memory");
}
static inline void native_irq_enable(void)
{
asm volatile("sti": : :"memory");
}
cli/sti
指令将清除/设置IF标志位,这个标志位控制着处理器是否响应中断或异常。
早期版本的内核中提供了一个叫做 cli
的函数来禁用所有处理器的中断,该函数已经被移除,替代它的是 local_irq_{enabled,disable}
宏,用于禁用或启用当前处理器的中断。我们在调用 local_irq_disable
宏禁用中断以后,接着设置了变量值:
early_boot_irqs_disabled = true;
变量 early_boot_irqs_disabled
定义在文件/source/include/linux/kernel.h
中:
extern bool early_boot_irqs_disabled;
并在另外的地方使用。例如在/source/kernel/smp.c
中的 smp_call_function_many
函数中,通过这个变量来检查当前是否由于中断禁用而处于死锁状态:
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
0 条评论