如何将 C 函数映射到 LLVM IR?

How to map C function to LLVM IR?

我有一个要求,我有一个 c 文件,我正在为此生成 LLVM IR。从为每条指令生成的 LLVM IR,我正在计算执行需要多少个周期,现在我的问题是如何将相同的内容追溯到 c 代码并显示特定的 c 代码块(比如函数)计算了多少个周期(我实际上是根据生成的 LLVM IR 代码计算的)。

我的c代码如下:

int arithmeticOperations(int x, int y)
{
    int aa, ab, ac, ad;
    if(x>10)
    {
        aa = x+y;
        ab = x-y;
        for(x = 1; x <= aa; ++x)
        {
            y += x;
        }
    }
    else
    {
        ac = x*y;
        ad = x/y;       
    }
    return aa * ab * ac * ad;
}

void arithmeticOperationsPart2(int x, int y)
{
    int aa, ab, ac, ad;
    if(x>10)
    {
        aa = x+y;
        ab = x-y;
    }
    else
    {
        ac = x*y;
        ad = x/y;       
    }
}

int main()
{
    arithmeticOperations(35, 7);
    arithmeticOperationsPart2(35, 7);
}

我正在使用命令创建 LLVM IR:

clang -Os -S -emit-llvm addition.c

此输出 addition.ll 文件如下:

; ModuleID = 'addition.c'
source_filename = "addition.c"
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc18.0.0"

; Function Attrs: norecurse nounwind optsize readnone uwtable
define i32 @arithmeticOperations(i32, i32) local_unnamed_addr #0 {
  %3 = icmp sgt i32 %0, 10
  br i1 %3, label %4, label %7

; <label>:4:                                      ; preds = %2
  %5 = add nsw i32 %1, %0
  %6 = sub nsw i32 %0, %1
  br label %10

; <label>:7:                                      ; preds = %2
  %8 = mul nsw i32 %1, %0
  %9 = sdiv i32 %0, %1
  br label %10

; <label>:10:                                     ; preds = %4, %7
  %11 = phi i32 [ undef, %7 ], [ %5, %4 ]
  %12 = phi i32 [ undef, %7 ], [ %6, %4 ]
  %13 = phi i32 [ %8, %7 ], [ undef, %4 ]
  %14 = phi i32 [ %9, %7 ], [ undef, %4 ]
  %15 = mul nsw i32 %12, %11
  %16 = mul nsw i32 %15, %13
  %17 = mul nsw i32 %16, %14
  ret i32 %17
}

; Function Attrs: norecurse nounwind optsize readnone uwtable
define void @arithmeticOperationsPart2(i32, i32) local_unnamed_addr #0 {
  ret void
}

; Function Attrs: norecurse nounwind optsize readnone uwtable
define i32 @main() local_unnamed_addr #0 {
  ret i32 0
}

attributes #0 = { norecurse nounwind optsize readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"clang version 5.0.0 (trunk 302984) (llvm/trunk 302983)"}

现在我想过滤一下它生成的c源码对应的LLVM代码是什么(具体说一个函数)

例如(目前我想过滤c函数arithmeticOperations):

  %3 = icmp sgt i32 %0, 10
  br i1 %3, label %4, label %7

; <label>:4:                                      ; preds = %2
  %5 = add nsw i32 %1, %0
  %6 = sub nsw i32 %0, %1
  br label %10

; <label>:7:                                      ; preds = %2
  %8 = mul nsw i32 %1, %0
  %9 = sdiv i32 %0, %1
  br label %10

; <label>:10:                                     ; preds = %4, %7
  %11 = phi i32 [ undef, %7 ], [ %5, %4 ]
  %12 = phi i32 [ undef, %7 ], [ %6, %4 ]
  %13 = phi i32 [ %8, %7 ], [ undef, %4 ]
  %14 = phi i32 [ %9, %7 ], [ undef, %4 ]
  %15 = mul nsw i32 %12, %11
  %16 = mul nsw i32 %15, %13
  %17 = mul nsw i32 %16, %14
  ret i32 %17

对应c代码的以下部分:

int aa, ab, ac, ad;
    if(x>10)
    {
        aa = x+y;
        ab = x-y;
        for(x = 1; x <= aa; ++x)
        {
            y += x;
        }
    }
    else
    {
        ac = x*y;
        ad = x/y;       
    }
    return aa * ab * ac * ad;

您可以通过添加 -g 标志告诉 clang 发出调试信息:

clang -Os -S -emit-llvm -g addition.c

然后你会发现很多关于哪个指令对应于你的 ll 文件中的原始行的信息。

例如 arithmeticOperations 函数的开头翻译如下,以 !dgb !<number> 结尾的行指的是调试信息条目:

; Function Attrs: nounwind optsize readnone uwtable
define i32 @arithmeticOperations(i32 %x, i32 %y) local_unnamed_addr #0 !dbg !7 {
entry:
  tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !12, metadata !18), !dbg !19
  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !13, metadata !18), !dbg !20
  %cmp = icmp sgt i32 %x, 10, !dbg !21
  br i1 %cmp, label %if.then, label %if.else, !dbg !23

在文件末尾会有许多 "DILocation" 条目告诉您相应的源代码在哪里:

...
!19 = !DILocation(line: 1, column: 37, scope: !7)
!20 = !DILocation(line: 1, column: 30, scope: !7)
!21 = !DILocation(line: 4, column: 9, scope: !22)
!22 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 8)
!23 = !DILocation(line: 4, column: 8, scope: !7)

因此,如果您对这条线的来源感兴趣:

%cmp = icmp sgt i32 %x, 10, !dbg !21

您必须查找调试条目 !21:

!21 = !DILocation(line: 4, column: 9, scope: !22)

事实上,第 9 行是 if 所在的位置:

9:    if(x>10)

Clangs 调试信息非常精确,甚至指向“>”运算符。