解释汇编函数行为的建议

Advice for interpreting an assembly function's behavior

我对汇编比较陌生,遇到了一个让我难过的函数。在调试过程中,我发现 $rdi 寄存器最初设置为 0x55555575a1b8。本质上,我正在寻找 return 任何值,但负值或零。最初,似乎将 $esi 设置为 0x5575a1b8 会 return 0x1。实际上它 returned -1。进一步检查发现有几个两个递归函数调用,每个调用都将 return 值保存到寄存器中以供以后比较:

r12 = Find(*(rdi + 0x8), rsi);
rax = Find(*(rdi + 0x10), rsi);

在此之后,我迷失在代码中...

0000000000001b62 <Find>:
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 <Find+0x41>
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 <Find+0x47>
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 <Find>
1b7d:   41 89 c4                mov    %eax,%r12d
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 <Find>
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 <Find+0x37>
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf <Find+0x4d>
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 <Find+0x32>
1ba3:   b8 ff ff ff ff          mov    [=11=]xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    [=11=]x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    [=11=]xffffffff,%eax
1bb4:   eb de                   jmp    1b94 <Find+0x32>

非常感谢任何对该方法行为的洞察。

0000000000001b62 <Find>:
; if (null == rdi) return -1;
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 <Find+0x41>
; if (esi == *rdi) return 1;
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 <Find+0x47>
; preserving r12, rbp, rbx
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
; r12d = Find( *(rdi+8), esi );
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 <Find>
1b7d:   41 89 c4                mov    %eax,%r12d
; eax = Find( *(rdi_orig+16), esi_orig)
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 <Find>
; if (r12d <= 0 && eax <= 0) return -1; (with restoring regs)
; if (r12d <= 0 && 0 < eax) return 2*eax + 1; (with restoring regs)
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 <Find+0x37>
; return 2*r12d; with restoring regs
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf <Find+0x4d>
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 <Find+0x32>
1ba3:   b8 ff ff ff ff          mov    [=10=]xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    [=10=]x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    [=10=]xffffffff,%eax
1bb4:   eb de                   jmp    1b94 <Find+0x32>

所以我认为在类似 C 的代码中(没有验证或调试,甚至只是编译):

struct node_s {
    int32_t value;
    int32_t _padding; // left starts at offset 8
    node_s* left;
    node_s* right;
};

int32_t Find(node_s* node, int32_t i) {
    if (nullptr == node) return -1;
    if (i == node->value) return 1;
    int32_t f1 = Find( node->left, i);
    int32_t f2 = Find( node->right, i);
    if (0 < f1) return 2 * f1;
    if (0 < f2) return 2 * f2 + 1;
    return -1;
}

在我看来,它是 return 二叉树中特定值的某种位置的未优化方式。

假设你有这样的树:

n0: {10, n1, n2}
n1: {11, n3, n4}
n2: {12, n5, n6}
n3: {13, nullptr, n7}
n4: {14, nullptr, nullptr}
n5: {15, nullptr, n8}
n6: {16, nullptr, nullptr}
n7: {17, nullptr, nullptr}
n8: {18, nullptr, nullptr}

           10
        /      \
     11            12
  /    \        /    \
13      14    15      16
  \             \
   17            18

那我猜:

Find(n0, 99) = -1
Find(n0, 10) = 1
Find(n0, 11) = 2
Find(n0, 12) = 3
Find(n1, 11) = 1
Find(n0, 17) = 12
Find(n1, 17) = 6
Find(n3, 17) = 3
Find(n7, 17) = 1
...

或者当从 n0 节点搜索特定值时,return 值将是(我希望我没有做错):

           1
        /      \
     2             3
  /    \        /    \
 4       6     5       7
  \             \
   12             13

所以它 returning 整个 "floor"(链接深度)作为某个范围 i .. i+f_n-1(f_n = BT 中地板上可能的最大节点数),其中范围的前半部分是左节点,范围的后半部分是右节点。

现在,如果您的机器中有该代码,您可以尝试为输入重新创建这样的树结构,如果我猜对了,请告诉我。:)

编辑:不,没有。那个底部将是 12、14,而不是 12、13,所以这个顺序比 nice left/right split 更棘手......太累了想不出这个名字。当你有从根到值的路径时,看起来像是某种字典顺序(但排序是向后的,就像反转的字符串,或者从值到顶部的路径):

1 : ""
2 : L
3 : R
4 : LL
5 : RL
6 : LR
7 : RR
8 : LLL
9 : RLL
10: LRL
11: RRL
12: LLR
13: RLR
14: LRR
15: RRR
16: LLLL
17: RLLL
18: LRLL
...