如何让 GCC 不删除易失性指针的读取?
How do I get GCC to not delete reads from volatile pointers?
我的源文件中有以下代码:
void *hardware = AllocateHardwareArea(SIZE);
volatile uint32_t *reader = (uint32_t *) hardware;
unsigned x;
for (x = 0; x < SIZE / sizeof(u32); ++x)
(void) *reader++;
ReleaseHardwareArea(hardware);
但是当我使用 -O3 在面向 ARMv6 的 GCC 4.9.2 上编译它时,编译器正在从汇编语言输出中删除整个 for
循环:
STMFD SP!, {R3,LR}
MOV R0, #0
MOV R1, #0x10000
BL AllocateHardwareArea
LDMFD SP!, {R3,LR}
B ReleaseHardwareArea
难道 volatile
不应该是 对于 这样的硬件寄存器情况吗?
我无法使用 GCC-4.9.3 复制您的结果(gcc-arm-none-eabi-4.9.3.2015q2-1trusty1
来自 Terry Guo's PPA for Ubuntu 14.04.2 LTS on x86_64)。以 file.c
、
开头
void *AllocateHardwareArea(const unsigned int);
void ReleaseHardwareArea(void *);
void test(const unsigned int size)
{
void *hardware = AllocateHardwareArea(size);
volatile unsigned int *reader = hardware;
unsigned int x;
for (x = 0; x < size / sizeof *reader; x++)
(void)*reader++;
ReleaseHardwareArea(hardware);
}
使用 arm-none-eabi-gcc-4.9.3 -march=armv6 -mtune=arm6 -O3 -S file.c
编译为以下程序集:
.arch armv6
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "file.c"
.text
.align 2
.global test
.type test, %function
test:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, lr}
mov r4, r0
bl AllocateHardwareArea
movs r2, r4, lsr #2
beq .L2
mov r3, r0
add r2, r0, r2, asl #2
.L3:
ldr r1, [r3]
add r3, r3, #4
cmp r3, r2
bne .L3
.L2:
ldmfd sp!, {r4, lr}
b ReleaseHardwareArea
.size test, .-test
或者,使用arm-none-eabi-gcc-4.9.3 -march=armv6 -mtune=arm6 -O3 -c file.c
编译为目标代码,使用arm-none-eabi-objdump -d file.o
反汇编为
file.o: file format elf32-littlearm
Disassembly of section .text:
00000000 <test>:
0: e92d4010 push {r4, lr}
4: e1a04000 mov r4, r0
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1b02124 lsrs r2, r4, #2
10: 0a000005 beq 2c <test+0x2c>
14: e1a03000 mov r3, r0
18: e0802102 add r2, r0, r2, lsl #2
1c: e5931000 ldr r1, [r3]
20: e2833004 add r3, r3, #4
24: e1530002 cmp r3, r2
28: 1afffffb bne 1c <test+0x1c>
2c: e8bd4010 pop {r4, lr}
30: eafffffe b 0 <ReleaseHardwareArea>
分配的区域以 unsigned int
大小的单位读取,因为它应该如此。在汇编源代码中,读取循环位于标签 .L3
和 .L2
之间。在目标代码中,读取循环位于 1c
..28
.
编辑添加:Olaf 在评论中指出,OP 可能使用常量 size
。让我们也检查一下这个案例:
void *AllocateHardwareArea(const unsigned int);
void ReleaseHardwareArea(void *);
#define SIZE 32
void test(void)
{
void *hardware = AllocateHardwareArea(SIZE);
volatile unsigned int *reader = hardware;
unsigned int x;
for (x = 0; x < SIZE / sizeof *reader; x++)
(void)*reader++;
ReleaseHardwareArea(hardware);
}
程序集是
.arch armv6
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "file2.c"
.text
.align 2
.global test
.type test, %function
test:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r3, lr}
mov r0, #32
bl AllocateHardwareArea
mov r3, r0
ldr r2, [r0]
ldr r2, [r0, #4]
ldr r2, [r0, #8]
ldr r2, [r0, #12]
ldr r2, [r0, #16]
ldr r2, [r0, #20]
ldr r2, [r0, #24]
ldr r3, [r3, #28]
ldmfd sp!, {r3, lr}
b ReleaseHardwareArea
.size test, .-test
.ident "GCC: (GNU Tools for ARM Embedded Processors) 4.9.3 20150529 (release) [ARM/embedded-4_9-branch revision 224288]"
和目标代码的反汇编
00000000 <test>:
0: e92d4008 push {r3, lr}
4: e3a00020 mov r0, #32
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1a03000 mov r3, r0
10: e5902000 ldr r2, [r0]
14: e5902004 ldr r2, [r0, #4]
18: e5902008 ldr r2, [r0, #8]
1c: e590200c ldr r2, [r0, #12]
20: e5902010 ldr r2, [r0, #16]
24: e5902014 ldr r2, [r0, #20]
28: e5902018 ldr r2, [r0, #24]
2c: e593301c ldr r3, [r3, #28]
30: e8bd4008 pop {r3, lr}
34: eafffffe b 0 <ReleaseHardwareArea>
即循环只是展开。当然,如果SIZE
小于4,那么循环就被优化掉了。 SIZE <= 71
发生展开。对于SIZE = 72
,目标代码是
00000000 <test>:
0: e92d4008 push {r3, lr}
4: e3a00048 mov r0, #72 ; 0x48
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1a03000 mov r3, r0
10: e2802048 add r2, r0, #72 ; 0x48
14: e5931000 ldr r1, [r3]
18: e2833004 add r3, r3, #4
1c: e1530002 cmp r3, r2
20: 1afffffb bne 14 <test+0x14>
24: e8bd4008 pop {r3, lr}
28: eafffffe b 0 <ReleaseHardwareArea>
由于您正在使用极端优化 (-O3
) 进行编译,因此我建议重写您的代码片段,随意添加 const
,而不是假定编译器会自动检测常量。例如,使用与上面相同的命令,以下版本
void *AllocateHardwareArea(const unsigned int);
void ReleaseHardwareArea(void *);
void test(const unsigned int size)
{
void *const hardware = AllocateHardwareArea(size);
volatile unsigned int *const reader = hardware;
const unsigned int n = size / sizeof *reader;
unsigned int i;
for (i = 0; i < n; i++)
reader[i];
ReleaseHardwareArea(hardware);
}
执行完全相同的任务,但内循环中的指令少了一条。集会是
.arch armv6
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "new.c"
.text
.align 2
.global test
.type test, %function
test:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, lr}
mov r4, r0
bl AllocateHardwareArea
movs r2, r4, lsr #2
beq .L2
mov r3, r0
add r2, r0, r2, asl #2
.L3:
ldr r1, [r3], #4
cmp r3, r2
bne .L3
.L2:
ldmfd sp!, {r4, lr}
b ReleaseHardwareArea
.size test, .-test
.ident "GCC: (GNU Tools for ARM Embedded Processors) 4.9.3 20150529 (release) [ARM/embedded-4_9-branch revision 224288]"
和目标代码
Disassembly of section .text:
00000000 <test>:
0: e92d4010 push {r4, lr}
4: e1a04000 mov r4, r0
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1b02124 lsrs r2, r4, #2
10: 0a000004 beq 28 <test+0x28>
14: e1a03000 mov r3, r0
18: e0802102 add r2, r0, r2, lsl #2
1c: e4931004 ldr r1, [r3], #4
20: e1530002 cmp r3, r2
24: 1afffffc bne 1c <test+0x1c>
28: e8bd4010 pop {r4, lr}
2c: eafffffe b 0 <ReleaseHardwareArea>
也许您可以测试一下您的 GCC 是否正确编译了后一个版本?如果没有,我们手头有一个编译器错误(假设 SIZE
至少为 4),possibly/likely 已在更高版本中修复。
我的源文件中有以下代码:
void *hardware = AllocateHardwareArea(SIZE);
volatile uint32_t *reader = (uint32_t *) hardware;
unsigned x;
for (x = 0; x < SIZE / sizeof(u32); ++x)
(void) *reader++;
ReleaseHardwareArea(hardware);
但是当我使用 -O3 在面向 ARMv6 的 GCC 4.9.2 上编译它时,编译器正在从汇编语言输出中删除整个 for
循环:
STMFD SP!, {R3,LR}
MOV R0, #0
MOV R1, #0x10000
BL AllocateHardwareArea
LDMFD SP!, {R3,LR}
B ReleaseHardwareArea
难道 volatile
不应该是 对于 这样的硬件寄存器情况吗?
我无法使用 GCC-4.9.3 复制您的结果(gcc-arm-none-eabi-4.9.3.2015q2-1trusty1
来自 Terry Guo's PPA for Ubuntu 14.04.2 LTS on x86_64)。以 file.c
、
void *AllocateHardwareArea(const unsigned int);
void ReleaseHardwareArea(void *);
void test(const unsigned int size)
{
void *hardware = AllocateHardwareArea(size);
volatile unsigned int *reader = hardware;
unsigned int x;
for (x = 0; x < size / sizeof *reader; x++)
(void)*reader++;
ReleaseHardwareArea(hardware);
}
使用 arm-none-eabi-gcc-4.9.3 -march=armv6 -mtune=arm6 -O3 -S file.c
编译为以下程序集:
.arch armv6
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "file.c"
.text
.align 2
.global test
.type test, %function
test:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, lr}
mov r4, r0
bl AllocateHardwareArea
movs r2, r4, lsr #2
beq .L2
mov r3, r0
add r2, r0, r2, asl #2
.L3:
ldr r1, [r3]
add r3, r3, #4
cmp r3, r2
bne .L3
.L2:
ldmfd sp!, {r4, lr}
b ReleaseHardwareArea
.size test, .-test
或者,使用arm-none-eabi-gcc-4.9.3 -march=armv6 -mtune=arm6 -O3 -c file.c
编译为目标代码,使用arm-none-eabi-objdump -d file.o
反汇编为
file.o: file format elf32-littlearm
Disassembly of section .text:
00000000 <test>:
0: e92d4010 push {r4, lr}
4: e1a04000 mov r4, r0
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1b02124 lsrs r2, r4, #2
10: 0a000005 beq 2c <test+0x2c>
14: e1a03000 mov r3, r0
18: e0802102 add r2, r0, r2, lsl #2
1c: e5931000 ldr r1, [r3]
20: e2833004 add r3, r3, #4
24: e1530002 cmp r3, r2
28: 1afffffb bne 1c <test+0x1c>
2c: e8bd4010 pop {r4, lr}
30: eafffffe b 0 <ReleaseHardwareArea>
分配的区域以 unsigned int
大小的单位读取,因为它应该如此。在汇编源代码中,读取循环位于标签 .L3
和 .L2
之间。在目标代码中,读取循环位于 1c
..28
.
编辑添加:Olaf 在评论中指出,OP 可能使用常量 size
。让我们也检查一下这个案例:
void *AllocateHardwareArea(const unsigned int);
void ReleaseHardwareArea(void *);
#define SIZE 32
void test(void)
{
void *hardware = AllocateHardwareArea(SIZE);
volatile unsigned int *reader = hardware;
unsigned int x;
for (x = 0; x < SIZE / sizeof *reader; x++)
(void)*reader++;
ReleaseHardwareArea(hardware);
}
程序集是
.arch armv6
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "file2.c"
.text
.align 2
.global test
.type test, %function
test:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r3, lr}
mov r0, #32
bl AllocateHardwareArea
mov r3, r0
ldr r2, [r0]
ldr r2, [r0, #4]
ldr r2, [r0, #8]
ldr r2, [r0, #12]
ldr r2, [r0, #16]
ldr r2, [r0, #20]
ldr r2, [r0, #24]
ldr r3, [r3, #28]
ldmfd sp!, {r3, lr}
b ReleaseHardwareArea
.size test, .-test
.ident "GCC: (GNU Tools for ARM Embedded Processors) 4.9.3 20150529 (release) [ARM/embedded-4_9-branch revision 224288]"
和目标代码的反汇编
00000000 <test>:
0: e92d4008 push {r3, lr}
4: e3a00020 mov r0, #32
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1a03000 mov r3, r0
10: e5902000 ldr r2, [r0]
14: e5902004 ldr r2, [r0, #4]
18: e5902008 ldr r2, [r0, #8]
1c: e590200c ldr r2, [r0, #12]
20: e5902010 ldr r2, [r0, #16]
24: e5902014 ldr r2, [r0, #20]
28: e5902018 ldr r2, [r0, #24]
2c: e593301c ldr r3, [r3, #28]
30: e8bd4008 pop {r3, lr}
34: eafffffe b 0 <ReleaseHardwareArea>
即循环只是展开。当然,如果SIZE
小于4,那么循环就被优化掉了。 SIZE <= 71
发生展开。对于SIZE = 72
,目标代码是
00000000 <test>:
0: e92d4008 push {r3, lr}
4: e3a00048 mov r0, #72 ; 0x48
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1a03000 mov r3, r0
10: e2802048 add r2, r0, #72 ; 0x48
14: e5931000 ldr r1, [r3]
18: e2833004 add r3, r3, #4
1c: e1530002 cmp r3, r2
20: 1afffffb bne 14 <test+0x14>
24: e8bd4008 pop {r3, lr}
28: eafffffe b 0 <ReleaseHardwareArea>
由于您正在使用极端优化 (-O3
) 进行编译,因此我建议重写您的代码片段,随意添加 const
,而不是假定编译器会自动检测常量。例如,使用与上面相同的命令,以下版本
void *AllocateHardwareArea(const unsigned int);
void ReleaseHardwareArea(void *);
void test(const unsigned int size)
{
void *const hardware = AllocateHardwareArea(size);
volatile unsigned int *const reader = hardware;
const unsigned int n = size / sizeof *reader;
unsigned int i;
for (i = 0; i < n; i++)
reader[i];
ReleaseHardwareArea(hardware);
}
执行完全相同的任务,但内循环中的指令少了一条。集会是
.arch armv6
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "new.c"
.text
.align 2
.global test
.type test, %function
test:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, lr}
mov r4, r0
bl AllocateHardwareArea
movs r2, r4, lsr #2
beq .L2
mov r3, r0
add r2, r0, r2, asl #2
.L3:
ldr r1, [r3], #4
cmp r3, r2
bne .L3
.L2:
ldmfd sp!, {r4, lr}
b ReleaseHardwareArea
.size test, .-test
.ident "GCC: (GNU Tools for ARM Embedded Processors) 4.9.3 20150529 (release) [ARM/embedded-4_9-branch revision 224288]"
和目标代码
Disassembly of section .text:
00000000 <test>:
0: e92d4010 push {r4, lr}
4: e1a04000 mov r4, r0
8: ebfffffe bl 0 <AllocateHardwareArea>
c: e1b02124 lsrs r2, r4, #2
10: 0a000004 beq 28 <test+0x28>
14: e1a03000 mov r3, r0
18: e0802102 add r2, r0, r2, lsl #2
1c: e4931004 ldr r1, [r3], #4
20: e1530002 cmp r3, r2
24: 1afffffc bne 1c <test+0x1c>
28: e8bd4010 pop {r4, lr}
2c: eafffffe b 0 <ReleaseHardwareArea>
也许您可以测试一下您的 GCC 是否正确编译了后一个版本?如果没有,我们手头有一个编译器错误(假设 SIZE
至少为 4),possibly/likely 已在更高版本中修复。