clang 以错误的方式优化了代码

clang optimized code in wrong way

部分代码如下:

static uint32_t mp4_atom_containers[] = {
    ATOM('m', 'o', 'o', 'v'), 
    ATOM('t', 'r', 'a', 'k'), 
    ATOM('m', 'd', 'i', 'a'),
    ATOM('m', 'i', 'n', 'f'),
    ATOM('s', 't', 'b', 'l')
};

typedef struct {
    uint32_t size;
    uint32_t type;
    u_char data[0];
} __packed mp4_atom_hdr_t;

static ngx_int_t f(mp4_file_t *mp4f, mp4_atom_t *atom)
{
    ngx_uint_t i;
    uint32_t atom_size;

    ngx_log_debug1(NGX_LOG_DEBUG_HTTP, mp4f->log, 0, "f:: type: %i", atom->hdr->type);

    if (atom->hdr->type == ATOM('t', 'r', 'a', 'k'))
        mp4f->trak = atom;
    else if (atom->hdr->type == ATOM('s', 't', 's', 'z'))
        mp4f->stsz = (mp4_atom_stsz_t *)atom->hdr;
    else if (atom->hdr->type == ATOM('s', 't', 's', 'c'))
        mp4f->stsc = (mp4_atom_stsc_t *)atom->hdr;
    else if (atom->hdr->type == ATOM('m', 'v', 'h', 'd'))
        mp4f->mvhd = (mp4_atom_mvhd_t *)atom->hdr;

    // point #1
    for (i = 0; i < sizeof(mp4_atom_containers); i++) {
        if (atom->hdr->type == mp4_atom_containers[i]) 
            goto found;
    }

    return 0;

found:
    atom_size = be32toh(atom->hdr->size) - sizeof(*hdr);
    // rest of the code

    return 0;
}

这是来自 clang 的二进制代码,用 Hopper 反汇编:

00000001000750c2         mov        rdx, qword [ds:r13+0x28]                    ; XREF=_mp4_parse_atom+35
00000001000750c6         mov        eax, dword [ds:rdx+4]
00000001000750c9         cmp        eax, 'srak'
00000001000750ce         jg         0x1000750e0

00000001000750d0         cmp        eax, 'stsc'
00000001000750d5         jne        0x1000750f0

00000001000750d7         mov        qword [ds:r14+0x140], rdx                   ; mp4f->stsc = (mp4_atom_stsz_t *)atom->hdr;
00000001000750de         jmp        0x10007510e

00000001000750e0         cmp        eax, 'stsz'                                 ; XREF=_mp4_parse_atom+78
00000001000750e5         jne        0x100075100

00000001000750e7         mov        qword [ds:r14+0x138], rdx                   ; mp4f->stsz = (mp4_atom_stsc_t *)atom->hdr;
00000001000750ee         jmp        0x10007510e

00000001000750f0         cmp        eax, 'mvhd'                                 ; XREF=_mp4_parse_atom+85
00000001000750f5         jne        0x10007510e

00000001000750f7         mov        qword [ds:r14+0x130], rdx                   ; mp4f->mvhd = (mp4_atom_mvhd_t *)atom->hdr;
00000001000750fe         jmp        0x10007510e

0000000100075100         cmp        eax, 'trak'                                 ; XREF=_mp4_parse_atom+101
0000000100075105         jne        0x10007510e

0000000100075107         mov        qword [ds:r14+0x128], r13                   ; mp4f->trak = atom;

000000010007510e         mov        rbx, r14                                    ; XREF=_mp4_parse_atom+94, _mp4_parse_atom+110, _mp4_parse_atom+117, _mp4_parse_atom+126, _mp4_parse_atom+133
0000000100075111         mov        eax, dword [ds:rdx]
0000000100075113         bswap      eax
0000000100075115         xor        r12d, r12d
0000000100075118         add        eax, 0xfffffff8
000000010007511b         mov        qword [ss:rbp+var_38], rax                 ; atom_size = be32toh(atom->hdr->size) - sizeof(*hdr);
000000010007511f         je         0x10007527f                                ; point #2

0000000100075125         lea        rax, qword [ds:r13+0x18]
0000000100075129         mov        qword [ss:rbp+var_48], rax
000000010007512d         xor        r14d, r14d
0000000100075130         mov        r15, rdx
0000000100075133         jmp        0x100075144
0000000100075135         nop        word [cs:rax+rax]
; other code

出于某种原因,clang 被删除或以错误的方式优化了第 #1 点的检查。在反汇编代码中,如果类型不是 mp4_atom_containers 类型之一,则必须转到点 #2。

for循环中的

mp4_atom_containers[i]越界访问,调用未定义的行为。 允许编译器以这种方式优化循环条件。

for (i = 0; i < sizeof(mp4_atom_containers); i++)

应该是

for (i = 0; i < sizeof mp4_atom_containers / sizeof *mp4_atom_containers; i++)

sizeof(mp4_atom_containers) 是整个数组的大小(以字节为单位),但 sizeof mp4_atom_containers / sizeof *mp4_atom_containers 是数组中元素的数量。

允许编译器假定 i 永远不会为 5,因为那将是越界访问,因此 i 将始终小于 sizeof(mp4_atom_containers)(这很可能是 20),所以这是一个无限循环。