如何让 GCC 不删除易失性指针的读取？

Question

我的源文件中有以下代码：

void *hardware = AllocateHardwareArea(SIZE);
volatile uint32_t *reader = (uint32_t *) hardware;
unsigned x;
for (x = 0; x < SIZE / sizeof(u32); ++x)
    (void) *reader++;
ReleaseHardwareArea(hardware);

但是当我使用 -O3 在面向 ARMv6 的 GCC 4.9.2 上编译它时，编译器正在从汇编语言输出中删除整个 for 循环：

STMFD   SP!, {R3,LR}
MOV     R0, #0
MOV     R1, #0x10000
BL      AllocateHardwareArea
LDMFD   SP!, {R3,LR}
B       ReleaseHardwareArea

难道 volatile 不应该是对于这样的硬件寄存器情况吗？

Answer 1

我无法使用 GCC-4.9.3 复制您的结果（gcc-arm-none-eabi-4.9.3.2015q2-1trusty1 来自 Terry Guo's PPA for Ubuntu 14.04.2 LTS on x86_64）。以 file.c、

开头

void *AllocateHardwareArea(const unsigned int);
void  ReleaseHardwareArea(void *);

void test(const unsigned int size)
{
    void *hardware = AllocateHardwareArea(size);
    volatile unsigned int *reader = hardware;
    unsigned int x;

    for (x = 0; x < size / sizeof *reader; x++)
        (void)*reader++;

    ReleaseHardwareArea(hardware);
}

使用 arm-none-eabi-gcc-4.9.3 -march=armv6 -mtune=arm6 -O3 -S file.c 编译为以下程序集：

        .arch armv6
        .fpu softvfp
        .eabi_attribute 20, 1
        .eabi_attribute 21, 1
        .eabi_attribute 23, 3
        .eabi_attribute 24, 1
        .eabi_attribute 25, 1
        .eabi_attribute 26, 1
        .eabi_attribute 30, 2
        .eabi_attribute 34, 1
        .eabi_attribute 18, 4
        .file   "file.c"
        .text
        .align  2
        .global test
        .type   test, %function
test:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        stmfd   sp!, {r4, lr}
        mov     r4, r0
        bl      AllocateHardwareArea
        movs    r2, r4, lsr #2
        beq     .L2
        mov     r3, r0
        add     r2, r0, r2, asl #2
.L3:
        ldr     r1, [r3]
        add     r3, r3, #4
        cmp     r3, r2
        bne     .L3
.L2:
        ldmfd   sp!, {r4, lr}
        b       ReleaseHardwareArea
        .size   test, .-test

或者，使用arm-none-eabi-gcc-4.9.3 -march=armv6 -mtune=arm6 -O3 -c file.c编译为目标代码，使用arm-none-eabi-objdump -d file.o反汇编为

file.o:     file format elf32-littlearm


Disassembly of section .text:

00000000 <test>:
   0:   e92d4010        push    {r4, lr}
   4:   e1a04000        mov     r4, r0
   8:   ebfffffe        bl      0 <AllocateHardwareArea>
   c:   e1b02124        lsrs    r2, r4, #2
  10:   0a000005        beq     2c <test+0x2c>
  14:   e1a03000        mov     r3, r0
  18:   e0802102        add     r2, r0, r2, lsl #2
  1c:   e5931000        ldr     r1, [r3]
  20:   e2833004        add     r3, r3, #4
  24:   e1530002        cmp     r3, r2
  28:   1afffffb        bne     1c <test+0x1c>
  2c:   e8bd4010        pop     {r4, lr}
  30:   eafffffe        b       0 <ReleaseHardwareArea>

分配的区域以 unsigned int 大小的单位读取，因为它应该如此。在汇编源代码中，读取循环位于标签 .L3 和 .L2 之间。在目标代码中，读取循环位于 1c..28.

编辑添加：Olaf 在评论中指出，OP 可能使用常量 size。让我们也检查一下这个案例：

void *AllocateHardwareArea(const unsigned int);
void  ReleaseHardwareArea(void *);

#define SIZE 32

void test(void)
{
    void *hardware = AllocateHardwareArea(SIZE);
    volatile unsigned int *reader = hardware;
    unsigned int x;

    for (x = 0; x < SIZE / sizeof *reader; x++)
        (void)*reader++;

    ReleaseHardwareArea(hardware);
}

程序集是

    .arch armv6
    .fpu softvfp
    .eabi_attribute 20, 1
    .eabi_attribute 21, 1
    .eabi_attribute 23, 3
    .eabi_attribute 24, 1
    .eabi_attribute 25, 1
    .eabi_attribute 26, 1
    .eabi_attribute 30, 2
    .eabi_attribute 34, 1
    .eabi_attribute 18, 4
    .file   "file2.c"
    .text
    .align  2
    .global test
    .type   test, %function
test:
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    stmfd   sp!, {r3, lr}
    mov     r0, #32
    bl      AllocateHardwareArea
    mov     r3, r0
    ldr     r2, [r0]
    ldr     r2, [r0, #4]
    ldr     r2, [r0, #8]
    ldr     r2, [r0, #12]
    ldr     r2, [r0, #16]
    ldr     r2, [r0, #20]
    ldr     r2, [r0, #24]
    ldr     r3, [r3, #28]
    ldmfd   sp!, {r3, lr}
    b       ReleaseHardwareArea
    .size   test, .-test
    .ident  "GCC: (GNU Tools for ARM Embedded Processors) 4.9.3 20150529 (release) [ARM/embedded-4_9-branch revision 224288]"

和目标代码的反汇编

00000000 <test>:
   0:   e92d4008        push    {r3, lr}
   4:   e3a00020        mov     r0, #32
   8:   ebfffffe        bl      0 <AllocateHardwareArea>
   c:   e1a03000        mov     r3, r0
  10:   e5902000        ldr     r2, [r0]
  14:   e5902004        ldr     r2, [r0, #4]
  18:   e5902008        ldr     r2, [r0, #8]
  1c:   e590200c        ldr     r2, [r0, #12]
  20:   e5902010        ldr     r2, [r0, #16]
  24:   e5902014        ldr     r2, [r0, #20]
  28:   e5902018        ldr     r2, [r0, #24]
  2c:   e593301c        ldr     r3, [r3, #28]
  30:   e8bd4008        pop     {r3, lr}
  34:   eafffffe        b       0 <ReleaseHardwareArea>

即循环只是展开。当然，如果SIZE小于4，那么循环就被优化掉了。 SIZE <= 71 发生展开。对于SIZE = 72，目标代码是

00000000 <test>:
   0:   e92d4008        push    {r3, lr}
   4:   e3a00048        mov     r0, #72 ; 0x48
   8:   ebfffffe        bl      0 <AllocateHardwareArea>
   c:   e1a03000        mov     r3, r0
  10:   e2802048        add     r2, r0, #72     ; 0x48
  14:   e5931000        ldr     r1, [r3]
  18:   e2833004        add     r3, r3, #4
  1c:   e1530002        cmp     r3, r2
  20:   1afffffb        bne     14 <test+0x14>
  24:   e8bd4008        pop     {r3, lr}
  28:   eafffffe        b       0 <ReleaseHardwareArea>

由于您正在使用极端优化 (-O3) 进行编译，因此我建议重写您的代码片段，随意添加 const，而不是假定编译器会自动检测常量。例如，使用与上面相同的命令，以下版本

void *AllocateHardwareArea(const unsigned int);
void  ReleaseHardwareArea(void *);

void test(const unsigned int size)
{
    void *const hardware = AllocateHardwareArea(size);
    volatile unsigned int *const reader = hardware;
    const unsigned int n = size / sizeof *reader;
    unsigned int i;

    for (i = 0; i < n; i++)
        reader[i];

    ReleaseHardwareArea(hardware);
}

执行完全相同的任务，但内循环中的指令少了一条。集会是

        .arch armv6
        .fpu softvfp
        .eabi_attribute 20, 1
        .eabi_attribute 21, 1
        .eabi_attribute 23, 3
        .eabi_attribute 24, 1
        .eabi_attribute 25, 1
        .eabi_attribute 26, 1
        .eabi_attribute 30, 2
        .eabi_attribute 34, 1
        .eabi_attribute 18, 4
        .file   "new.c"
        .text
        .align  2
        .global test
        .type   test, %function
test:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        stmfd   sp!, {r4, lr}
        mov     r4, r0
        bl      AllocateHardwareArea
        movs    r2, r4, lsr #2
        beq     .L2
        mov     r3, r0
        add     r2, r0, r2, asl #2
.L3:
        ldr     r1, [r3], #4
        cmp     r3, r2
        bne     .L3
.L2:
        ldmfd   sp!, {r4, lr}
        b       ReleaseHardwareArea
        .size   test, .-test
        .ident  "GCC: (GNU Tools for ARM Embedded Processors) 4.9.3 20150529 (release) [ARM/embedded-4_9-branch revision 224288]"

和目标代码

Disassembly of section .text:

00000000 <test>:
   0:   e92d4010        push    {r4, lr}
   4:   e1a04000        mov     r4, r0
   8:   ebfffffe        bl      0 <AllocateHardwareArea>
   c:   e1b02124        lsrs    r2, r4, #2
  10:   0a000004        beq     28 <test+0x28>
  14:   e1a03000        mov     r3, r0
  18:   e0802102        add     r2, r0, r2, lsl #2
  1c:   e4931004        ldr     r1, [r3], #4
  20:   e1530002        cmp     r3, r2
  24:   1afffffc        bne     1c <test+0x1c>
  28:   e8bd4010        pop     {r4, lr}
  2c:   eafffffe        b       0 <ReleaseHardwareArea>

也许您可以测试一下您的 GCC 是否正确编译了后一个版本？如果没有，我们手头有一个编译器错误（假设 SIZE 至少为 4），possibly/likely 已在更高版本中修复。

如何让 GCC 不删除易失性指针的读取？

How do I get GCC to not delete reads from volatile pointers?

c

gcc

volatile

compiler-optimization