找不到为 percpu 变量访问声明堆栈中抢占计数的位置。 (linux 内核)
Can't find where preempt count in the stack is declared for a percpu variable access. (linux kernel)
我正在调试 linux 启动并试图了解这些 percpu
变量在 arm64 中是如何工作的。为了测试,我添加了一个名为 read_pkcontext1
的函数,其中 returns percpu 变量 printk_context
。 (此值用于printk)而且我发现了一些我无法理解的东西。
(这是来自 linux 5.4.21)
==== kernel/printk/printk_safe.c ====
int read_pkcontext1(void) /* function I added for test */
{
return this_cpu_read(printk_context);
}
==== include/linux/percpu-defs.h ====
/*
* Operations with implied preemption/interrupt protection. These
* operations can be used without worrying about preemption or interrupt.
*/
#define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, pcp)
==== include/linux/percpu-defs.h ====
#define __pcpu_size_call_return(stem, variable) \
({ \
typeof(variable) pscr_ret__; \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr_ret__ = stem##1(variable); break; \
case 2: pscr_ret__ = stem##2(variable); break; \
case 4: pscr_ret__ = stem##4(variable); break; \
case 8: pscr_ret__ = stem##8(variable); break; \
default: \
__bad_size_call_parameter(); break; \
} \
pscr_ret__; \
})
这是 aarch64-none-elf-objdump -S vmlinux
的结果 read_pkcontext1
函数和内部使用的函数(关闭优化)。
ffffffc0100f0dc0 <read_pkcontext1>:
void write_pkcontext(void);
#pragma GCC push_options
#pragma GCC optimize ("O0")
int read_pkcontext1(void)
{
ffffffc0100f0dc0: a9bd7bfd stp x29, x30, [sp, #-48]!
ffffffc0100f0dc4: 910003fd mov x29, sp
return this_cpu_read(printk_context);
ffffffc0100f0dc8: f9000fff str xzr, [sp, #24]
ffffffc0100f0dcc: 52800020 mov w0, #0x1 // #1
ffffffc0100f0dd0: 94000018 bl ffffffc0100f0e30 <__preempt_count_add>
... skip ...
ffffffc0100f0e30 <__preempt_count_add>:
ffffffc0100f0e30: d5384101 mrs x1, sp_el0
ffffffc0100f0e34: b9401022 ldr w2, [x1, #16]
pc += val;
ffffffc0100f0e38: 0b020000 add w0, w0, w2
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
ffffffc0100f0e3c: b9001020 str w0, [x1, #16]
}
ffffffc0100f0e40: d65f03c0 ret
在上面的代码中,它使用 w0 = #1(增加抢占计数)调用 __preempt_count_add,并且 __preempt_count_add 函数将值 (w0) 添加到 sp + #16 处的变量并将其写回。所以堆栈中的这个变量看起来像抢占计数。 (我想这可以防止抢占)。我的问题是:堆栈中的这个值是什么时候定义和初始化的?我在 linux 来源中找不到它。 (使用qemu,我看到这个值是1,在__preempt_count_add后递增到2。当然在访问percpu变量后又递减回1。)
this_cpu_read(printk_context)
扩展为:
⇒ __pcpu_size_call_return(this_cpu_read_, printk_context)
⇒
({
typeof(printk_context) pscr_ret__;
__verify_pcpu_ptr(&(printk_context));
switch(sizeof(printk_context)) {
case 1: pscr_ret__ = this_cpu_read_1(printk_context); break;
case 2: pscr_ret__ = this_cpu_read_2(printk_context); break;
case 4: pscr_ret__ = this_cpu_read_4(printk_context); break;
case 8: pscr_ret__ = this_cpu_read_8(printk_context); break;
default:
__bad_size_call_parameter(); break;
}
pscr_ret__;
})
sizeof(printk_context)
是 4,所以 pscr_ret__ = this_cpu_read_4(printk_context);
.
this_cpu_read_4()
宏定义为#include <asm/percpu.h>
:
==== arch/arm64/include/asm/percpu.h ====
#define this_cpu_read_4(pcp) \
_pcp_protect_return(__percpu_read_32, pcp)
#define _pcp_protect_return(op, pcp, args...) \
({ \
typeof(pcp) __retval; \
preempt_disable_notrace(); \
__retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
preempt_enable_notrace(); \
__retval; \
})
这就是发生抢占计数操作的地方。
preempt_disable_notrace()
和 preempt_enable_notrace()
宏由 #include <linux/preempt.h>
定义。
==== include/linux/preempt.h ====
#define preempt_enable_notrace() \
do { \
barrier(); \
__preempt_count_dec(); \
} while (0)
#define preempt_disable_notrace() \
do { \
__preempt_count_inc(); \
barrier(); \
} while (0)
#define __preempt_count_inc() __preempt_count_add(1)
#define __preempt_count_dec() __preempt_count_sub(1)
__preempt_count_add()
和 __preempt_count_sub()
由 #include <asm/preempt.h>
定义。
==== arch/arm64/include/asm/preempt.h ====
static inline void __preempt_count_add(int val)
{
u32 pc = READ_ONCE(current_thread_info()->preempt.count);
pc += val;
WRITE_ONCE(current_thread_info()->preempt.count, pc);
}
static inline void __preempt_count_sub(int val)
{
u32 pc = READ_ONCE(current_thread_info()->preempt.count);
pc -= val;
WRITE_ONCE(current_thread_info()->preempt.count, pc);
}
对于 arm64,CONFIG_THREAD_INFO_IN_TASK
已启用,因此 current_thread_info()
被 #include <linux/thread_info.h>
定义为宏。
==== include/linux/thread_info.h ====
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
* For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the
* definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels,
* including <asm/current.h> can cause a circular dependency on some platforms.
*/
#include <asm/current.h>
#define current_thread_info() ((struct thread_info *)current)
#endif
current
宏由 #include <asm/current.h>
定义。
==== arch/arm64/include/asm/current.h ====
#define current get_current()
/*
* We don't use read_sysreg() as we want the compiler to cache the value where
* possible.
*/
static __always_inline struct task_struct *get_current(void)
{
unsigned long sp_el0;
asm ("mrs %0, sp_el0" : "=r" (sp_el0));
return (struct task_struct *)sp_el0;
}
arch/arm64/kernel/entry.S 中有一些与使用 sp_el0
堆栈指针指向当前 thread_info 有关的魔法/task_struct。抱歉,我没有时间研究血淋淋的细节,但它是由 commit 6cdf9c7ca687 ("arm64: Store struct thread_info in sp_el0") 介绍的。
关键是 sp_el0
寄存器与 sp
不同。内核在 EL0 模式下不 运行,因此 sp_el0
可用作“临时”寄存器。内核用它来指向当前的 thread_info / task_struct.
struct task_struct
由 #include <linux/sched.h>
.
定义
==== include/linux/sched.h ====
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
* For reasons of header soup (see current_thread_info()), this
* must be the first element of task_struct.
*/
struct thread_info thread_info;
#endif
/* -1 unrunnable, 0 runnable, >0 stopped: */
volatile long state;
由于选择了CONFIG_THREAD_INFO_IN_TASK
,第一个成员是struct thread_info thread_info
。 current_thread_info()
指向当前任务中的那个成员。
struct thread_info
由 #include <asm/thread_info.h>
.
定义
==== arch/arm64/include/asm/thread_info.h ====
/*
* low level task data that entry.S needs immediate access to.
*/
struct thread_info {
unsigned long flags; /* low level flags */
mm_segment_t addr_limit; /* address limit */
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
union {
u64 preempt_count; /* 0 => preemptible, <0 => bug */
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
u32 need_resched;
u32 count;
#else
u32 count;
u32 need_resched;
#endif
} preempt;
};
};
当未选择 CONFIG_ARM64_SW_TTBR0_PAN
且 CPU 为 little-endian 时,preempt.count
成员将从结构的开头偏移 16。
我正在调试 linux 启动并试图了解这些 percpu
变量在 arm64 中是如何工作的。为了测试,我添加了一个名为 read_pkcontext1
的函数,其中 returns percpu 变量 printk_context
。 (此值用于printk)而且我发现了一些我无法理解的东西。
(这是来自 linux 5.4.21)
==== kernel/printk/printk_safe.c ====
int read_pkcontext1(void) /* function I added for test */
{
return this_cpu_read(printk_context);
}
==== include/linux/percpu-defs.h ====
/*
* Operations with implied preemption/interrupt protection. These
* operations can be used without worrying about preemption or interrupt.
*/
#define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, pcp)
==== include/linux/percpu-defs.h ====
#define __pcpu_size_call_return(stem, variable) \
({ \
typeof(variable) pscr_ret__; \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr_ret__ = stem##1(variable); break; \
case 2: pscr_ret__ = stem##2(variable); break; \
case 4: pscr_ret__ = stem##4(variable); break; \
case 8: pscr_ret__ = stem##8(variable); break; \
default: \
__bad_size_call_parameter(); break; \
} \
pscr_ret__; \
})
这是 aarch64-none-elf-objdump -S vmlinux
的结果 read_pkcontext1
函数和内部使用的函数(关闭优化)。
ffffffc0100f0dc0 <read_pkcontext1>:
void write_pkcontext(void);
#pragma GCC push_options
#pragma GCC optimize ("O0")
int read_pkcontext1(void)
{
ffffffc0100f0dc0: a9bd7bfd stp x29, x30, [sp, #-48]!
ffffffc0100f0dc4: 910003fd mov x29, sp
return this_cpu_read(printk_context);
ffffffc0100f0dc8: f9000fff str xzr, [sp, #24]
ffffffc0100f0dcc: 52800020 mov w0, #0x1 // #1
ffffffc0100f0dd0: 94000018 bl ffffffc0100f0e30 <__preempt_count_add>
... skip ...
ffffffc0100f0e30 <__preempt_count_add>:
ffffffc0100f0e30: d5384101 mrs x1, sp_el0
ffffffc0100f0e34: b9401022 ldr w2, [x1, #16]
pc += val;
ffffffc0100f0e38: 0b020000 add w0, w0, w2
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
ffffffc0100f0e3c: b9001020 str w0, [x1, #16]
}
ffffffc0100f0e40: d65f03c0 ret
在上面的代码中,它使用 w0 = #1(增加抢占计数)调用 __preempt_count_add,并且 __preempt_count_add 函数将值 (w0) 添加到 sp + #16 处的变量并将其写回。所以堆栈中的这个变量看起来像抢占计数。 (我想这可以防止抢占)。我的问题是:堆栈中的这个值是什么时候定义和初始化的?我在 linux 来源中找不到它。 (使用qemu,我看到这个值是1,在__preempt_count_add后递增到2。当然在访问percpu变量后又递减回1。)
this_cpu_read(printk_context)
扩展为:
⇒ __pcpu_size_call_return(this_cpu_read_, printk_context)
⇒
({
typeof(printk_context) pscr_ret__;
__verify_pcpu_ptr(&(printk_context));
switch(sizeof(printk_context)) {
case 1: pscr_ret__ = this_cpu_read_1(printk_context); break;
case 2: pscr_ret__ = this_cpu_read_2(printk_context); break;
case 4: pscr_ret__ = this_cpu_read_4(printk_context); break;
case 8: pscr_ret__ = this_cpu_read_8(printk_context); break;
default:
__bad_size_call_parameter(); break;
}
pscr_ret__;
})
sizeof(printk_context)
是 4,所以 pscr_ret__ = this_cpu_read_4(printk_context);
.
this_cpu_read_4()
宏定义为#include <asm/percpu.h>
:
==== arch/arm64/include/asm/percpu.h ====
#define this_cpu_read_4(pcp) \
_pcp_protect_return(__percpu_read_32, pcp)
#define _pcp_protect_return(op, pcp, args...) \
({ \
typeof(pcp) __retval; \
preempt_disable_notrace(); \
__retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
preempt_enable_notrace(); \
__retval; \
})
这就是发生抢占计数操作的地方。
preempt_disable_notrace()
和 preempt_enable_notrace()
宏由 #include <linux/preempt.h>
定义。
==== include/linux/preempt.h ====
#define preempt_enable_notrace() \
do { \
barrier(); \
__preempt_count_dec(); \
} while (0)
#define preempt_disable_notrace() \
do { \
__preempt_count_inc(); \
barrier(); \
} while (0)
#define __preempt_count_inc() __preempt_count_add(1)
#define __preempt_count_dec() __preempt_count_sub(1)
__preempt_count_add()
和 __preempt_count_sub()
由 #include <asm/preempt.h>
定义。
==== arch/arm64/include/asm/preempt.h ====
static inline void __preempt_count_add(int val)
{
u32 pc = READ_ONCE(current_thread_info()->preempt.count);
pc += val;
WRITE_ONCE(current_thread_info()->preempt.count, pc);
}
static inline void __preempt_count_sub(int val)
{
u32 pc = READ_ONCE(current_thread_info()->preempt.count);
pc -= val;
WRITE_ONCE(current_thread_info()->preempt.count, pc);
}
对于 arm64,CONFIG_THREAD_INFO_IN_TASK
已启用,因此 current_thread_info()
被 #include <linux/thread_info.h>
定义为宏。
==== include/linux/thread_info.h ====
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
* For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the
* definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels,
* including <asm/current.h> can cause a circular dependency on some platforms.
*/
#include <asm/current.h>
#define current_thread_info() ((struct thread_info *)current)
#endif
current
宏由 #include <asm/current.h>
定义。
==== arch/arm64/include/asm/current.h ====
#define current get_current()
/*
* We don't use read_sysreg() as we want the compiler to cache the value where
* possible.
*/
static __always_inline struct task_struct *get_current(void)
{
unsigned long sp_el0;
asm ("mrs %0, sp_el0" : "=r" (sp_el0));
return (struct task_struct *)sp_el0;
}
arch/arm64/kernel/entry.S 中有一些与使用 sp_el0
堆栈指针指向当前 thread_info 有关的魔法/task_struct。抱歉,我没有时间研究血淋淋的细节,但它是由 commit 6cdf9c7ca687 ("arm64: Store struct thread_info in sp_el0") 介绍的。
关键是 sp_el0
寄存器与 sp
不同。内核在 EL0 模式下不 运行,因此 sp_el0
可用作“临时”寄存器。内核用它来指向当前的 thread_info / task_struct.
struct task_struct
由 #include <linux/sched.h>
.
==== include/linux/sched.h ====
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
* For reasons of header soup (see current_thread_info()), this
* must be the first element of task_struct.
*/
struct thread_info thread_info;
#endif
/* -1 unrunnable, 0 runnable, >0 stopped: */
volatile long state;
由于选择了CONFIG_THREAD_INFO_IN_TASK
,第一个成员是struct thread_info thread_info
。 current_thread_info()
指向当前任务中的那个成员。
struct thread_info
由 #include <asm/thread_info.h>
.
==== arch/arm64/include/asm/thread_info.h ====
/*
* low level task data that entry.S needs immediate access to.
*/
struct thread_info {
unsigned long flags; /* low level flags */
mm_segment_t addr_limit; /* address limit */
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
union {
u64 preempt_count; /* 0 => preemptible, <0 => bug */
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
u32 need_resched;
u32 count;
#else
u32 count;
u32 need_resched;
#endif
} preempt;
};
};
当未选择 CONFIG_ARM64_SW_TTBR0_PAN
且 CPU 为 little-endian 时,preempt.count
成员将从结构的开头偏移 16。