I am running the llvm optimizer on a piece of llvm-ir code that I am generating. After running the optimizer, memory accesses get translated from 32 bits addresses to 64. I'd like to avoid that as the tool I am using to run the software verification has some problems with 64bits pointers.
This is the original code:
target triple = "i386-unknown-linux-gnu"
@Global_0 = local_unnamed_addr global i32 0
@Global_1 = local_unnamed_addr global i32 0
@Global_2 = local_unnamed_addr global i32 0
@Global_3 = local_unnamed_addr global i32 0
@mem = local_unnamed_addr global [128 x i8] zeroinitializer, align 1
define i32 @main() #0 {
%local_0 = alloca i32
store i32 0, i32* %local_0
%local_1 = alloca i32
store i32 0, i32* %local_1
%local_2 = alloca i32
store i32 0, i32* %local_2
br label %box_0
%s_0 = load i32, i32* @Global_2, !Stack !{ !"Stack((s_0, W32Int()))" }
br label %box_1
store i32 %s_0, i32* %local_1, !Stack !{ !"Stack()" }
br label %box_2
%s_1 = load i32, i32* @Global_2, !Stack !{ !"Stack((s_1, W32Int()))" }
br label %box_3
%s_2 = add i32 0, 48, !Stack !{ !"Stack((s_2, W32Int()), (s_1, W32Int()))" }
br label %box_4
%s_3 = add i32 %s_1, %s_2, !Stack !{ !"Stack((s_3, W32Int()))" }
br label %box_5
store i32 %s_3, i32* @Global_2, !Stack !{ !"Stack()" }
br label %box_6
%s_4 = load i32, i32* %local_1, !Stack !{ !"Stack((s_4, W32Int()))" }
br label %box_7
store i32 %s_4, i32* %local_2, !Stack !{ !"Stack()" }
br label %loop_8
br label %box_9
%s_5 = load i32, i32* %local_2, !Stack !{ !"Stack((s_5, W32Int()))" }
br label %box_10
%s_6 = load i32, i32* %local_0, !Stack !{ !"Stack((s_6, W32Int()), (s_5, W32Int()))" }
br label %box_11
%s_7 = add i32 0, 2, !Stack !{ !"Stack((s_7, W32Int()), (s_6, W32Int()), (s_5, W32Int()))" }
br label %box_12
%s_8 = shl i32 %s_6, %s_7, !Stack !{ !"Stack((s_8, W32Int()), (s_5, W32Int()))" }
br label %box_13
%s_9 = add i32 %s_5, %s_8, !Stack !{ !"Stack((s_9, W32Int()))" }
br label %box_14
%s_10 = load i32, i32* %local_0, !Stack !{ !"Stack((s_10, W32Int()), (s_9, W32Int()))" }
br label %box_15
%temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9, !Stack !{ !"Stack()" }
%temp_1 = bitcast i8* %temp_0 to i32*, !Stack !{ !"Stack()" }
store i32 %s_10, i32* %temp_1, !Stack !{ !"Stack()" }
br label %box_16
%s_11 = load i32, i32* %local_0, !Stack !{ !"Stack((s_11, W32Int()))" }
br label %box_17
%s_12 = add i32 0, 1, !Stack !{ !"Stack((s_12, W32Int()), (s_11, W32Int()))" }
br label %box_18
%s_13 = add i32 %s_11, %s_12, !Stack !{ !"Stack((s_13, W32Int()))" }
br label %box_19
store i32 %s_13, i32* %local_0, !Stack !{ !"Stack()" }
br label %box_20
%s_14 = add i32 0, 5, !Stack !{ !"Stack((s_14, W32Int()), (s_13, W32Int()))" }
br label %box_21
%s_15 = icmp ne i32 %s_13, %s_14, !Stack !{ !"Stack()" }
%s_16 = zext i1 %s_15 to i32
br label %cond.branch_22
%temp_2 = icmp ne i32 %s_16, 0, !Stack !{ !"Stack()" }
br i1 %temp_2, label %loop_8, label %loop_8.end
br label %box_23
%s_17 = load i32, i32* %local_2, !Stack !{ !"Stack((s_17, W32Int()))" }
br label %box_24
%temp_5 = add i32 16, %s_17, !Stack !{ !"Stack((s_18, W32Int()))" }
%temp_3 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %temp_5, !Stack !{ !"Stack((s_18, W32Int()))" }
%temp_4 = bitcast i8* %temp_3 to i32*, !Stack !{ !"Stack((s_18, W32Int()))" }
%s_18 = load i32, i32* %temp_4, !Stack !{ !"Stack((s_18, W32Int()))" }
br label %box_25
%s_19 = add i32 0, 4, !Stack !{ !"Stack((s_19, W32Int()), (s_18, W32Int()))" }
br label %box_26
%s_20 = icmp eq i32 %s_18, %s_19, !Stack !{ !"Stack()" }
%s_21 = zext i1 %s_20 to i32
br label %if_27
%temp_6 = icmp ne i32 %s_21, 0, !Stack !{ !"Stack()" }
br i1 %temp_6, label %box_28, label %box_32
%s_22 = load i32, i32* %local_1, !Stack !{ !"Stack((s_22, W32Int()))" }
br label %box_29
store i32 %s_22, i32* @Global_2, !Stack !{ !"Stack()" }
br label %box_30
%s_23 = add i32 0, 0, !Stack !{ !"Stack((s_23, W32Int()))" }
br label %box_31
br label %if_27_cond.end
call void (...) @__VERIFIER_error() #2
br label %if_27_cond.end
br label %box_33
%s_24 = add i32 0, 0, !Stack !{ !"Stack((s_24, W32Int()))" }
br label %func_2_exit
ret i32 %s_24
declare void @abort(i32 )
declare void @__VERIFIER_error(...) #1
attributes #1 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { noreturn }
As an example in this code snippet regarding pointers, using an i32:
%temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9
I run:
opt -always-inline -O2 -S output.ll > output-optimized.ll
With this version of opt:
$ opt -version
LLVM (http://llvm.org/):
LLVM version 6.0.1
Optimized build.
Default target: x86_64-unknown-linux-gnu
Host CPU: haswell
The final result is :
; ModuleID = 'output.ll'
source_filename = "output.ll"
target triple = "i386-unknown-linux-gnu"
@Global_0 = local_unnamed_addr global i32 0
@Global_1 = local_unnamed_addr global i32 0
@Global_2 = local_unnamed_addr global i32 0
@Global_3 = local_unnamed_addr global i32 0
@mem = local_unnamed_addr global [128 x i8] zeroinitializer, align 1
define i32 @main() local_unnamed_addr {
%s_0 = load i32, i32* @Global_2, align 4, !Stack !0
%0 = sext i32 %s_0 to i64
%temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %0, !Stack !1
%temp_1 = bitcast i8* %temp_0 to i32*, !Stack !1
store i32 0, i32* %temp_1, align 4, !Stack !1
%s_9.1 = add i32 %s_0, 4, !Stack !2
%1 = sext i32 %s_9.1 to i64
%temp_0.1 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %1, !Stack !1
%temp_1.1 = bitcast i8* %temp_0.1 to i32*, !Stack !1
store i32 1, i32* %temp_1.1, align 4, !Stack !1
%s_9.2 = add i32 %s_0, 8, !Stack !2
%2 = sext i32 %s_9.2 to i64
%temp_0.2 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %2, !Stack !1
%temp_1.2 = bitcast i8* %temp_0.2 to i32*, !Stack !1
store i32 2, i32* %temp_1.2, align 4, !Stack !1
%s_9.3 = add i32 %s_0, 12, !Stack !2
%3 = sext i32 %s_9.3 to i64
%temp_0.3 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %3, !Stack !1
%temp_1.3 = bitcast i8* %temp_0.3 to i32*, !Stack !1
store i32 3, i32* %temp_1.3, align 4, !Stack !1
%s_9.4 = add i32 %s_0, 16
%4 = sext i32 %s_9.4 to i64
%temp_0.4 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %4
%temp_1.4 = bitcast i8* %temp_0.4 to i32*
store i32 4, i32* %temp_1.4, align 4, !Stack !1
store i32 %s_0, i32* @Global_2, align 4, !Stack !1
ret i32 0
!0 = !{!"Stack((s_0, W32Int()))"}
!1 = !{!"Stack()"}
!2 = !{!"Stack((s_9, W32Int()))"}
As you can see, now I got i64s for mem access:
%temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %0
What I need to get in the output is all those getelementptr instructions using i32s instead of i64s. Any ideas?
As @arrowd pointed out in the question comments, adding the target datalayout metadata solves the issue:
This specifies the size of a pointer and its <abi> and <pref>erred
alignments for address space n. The fourth parameter <idx> is a size of
index that used for address calculation. If not specified, the default index
size is equal to the pointer size. All sizes are in bits. The address space,
n, is optional, and if not specified, denotes the default address space 0.
The value of n must be in the range [1,2^23).
The bit that specifies the pointers bit size is then the p option, which I added p:32:32, leaving the default for alignment and index size.
The final line is:
target datalayout = "e-p:32:32-m:e-i64:64-f80:128-n8:16:32:64-S128"
Running the opt command:
opt -always-inline -O2 -S output.ll > output-optimized.ll
Produces the expected result:
; ModuleID = 'output.ll'
source_filename = "output.ll"
target datalayout = "e-p:32:32-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "i386-unknown-linux-gnu"
@Global_0 = local_unnamed_addr global i32 0
@Global_1 = local_unnamed_addr global i32 0
@Global_2 = local_unnamed_addr global i32 0
@Global_3 = local_unnamed_addr global i32 0
@mem = local_unnamed_addr global [128 x i8] zeroinitializer, align 1
define i32 @main() local_unnamed_addr {
%s_0 = load i32, i32* @Global_2, align 4, !Stack !0
%temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_0, !Stack !1
%temp_1 = bitcast i8* %temp_0 to i32*, !Stack !1
store i32 0, i32* %temp_1, align 4, !Stack !1
%s_9.1 = add i32 %s_0, 4, !Stack !2
%temp_0.1 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.1, !Stack !1
%temp_1.1 = bitcast i8* %temp_0.1 to i32*, !Stack !1
store i32 1, i32* %temp_1.1, align 4, !Stack !1
%s_9.2 = add i32 %s_0, 8, !Stack !2
%temp_0.2 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.2, !Stack !1
%temp_1.2 = bitcast i8* %temp_0.2 to i32*, !Stack !1
store i32 2, i32* %temp_1.2, align 4, !Stack !1
%s_9.3 = add i32 %s_0, 12, !Stack !2
%temp_0.3 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.3, !Stack !1
%temp_1.3 = bitcast i8* %temp_0.3 to i32*, !Stack !1
store i32 3, i32* %temp_1.3, align 4, !Stack !1
%s_9.4 = add i32 %s_0, 16
%temp_0.4 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.4
%temp_1.4 = bitcast i8* %temp_0.4 to i32*
store i32 4, i32* %temp_1.4, align 4, !Stack !1
store i32 %s_0, i32* @Global_2, align 4, !Stack !1
ret i32 0