Search code examples
swiftperformanceassemblycpu-speed

How to estimate time consumption of my sample code by order of magnitude


I write a piece of code to calculate PI using Monte Carlo method running on a 2013 mac book air with 1.7 GHz Intel Core i7 (Seems to be 4650U). When the loop count is 10^8 it took 2 ~ 3 second, and when the loop count is 10^9 it took about 25 second.

import Foundation

func randomNumber(lowerBound:Double, upperBound:Double) -> Double {
    return lowerBound + Double(rand()) / Double(RAND_MAX) * (upperBound - lowerBound)
}

let pointNumber = 1000000000
var pointInsideCount = 0
for i in 0...pointNumber {
    let x = randomNumber(-1.0, upperBound:1.0)
    let y = randomNumber(-1.0, upperBound:1.0)
    if x*x+y*y <= 1 {
        pointInsideCount += 1
    }
}
let result = Double(pointInsideCount) / Double(pointNumber) * 4
let piString = String(format: "%.50f", result)
print("Pi is \(piString)")

I run "di -n randomNumber" to get the assemble code of the randomNumber function

swiftTest`swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double:
    0x10023c160 <+0>:  pushq  %rbp
    0x10023c161 <+1>:  movq   %rsp, %rbp
    0x10023c164 <+4>:  subq   $0x20, %rsp
    0x10023c168 <+8>:  movsd  %xmm0, -0x8(%rbp)
    0x10023c16d <+13>: movsd  %xmm1, -0x10(%rbp)
    0x10023c172 <+18>: movsd  %xmm0, -0x18(%rbp)
    0x10023c177 <+23>: movsd  %xmm1, -0x20(%rbp)
    0x10023c17c <+28>: callq  0x10027585e               ; symbol stub for: rand
    0x10023c181 <+33>: movsd  0x3bc1f(%rip), %xmm0      ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 352
    0x10023c189 <+41>: cvtsi2sdl %eax, %xmm1
    0x10023c18d <+45>: divsd  %xmm0, %xmm1
    0x10023c191 <+49>: movsd  -0x20(%rbp), %xmm0
    0x10023c196 <+54>: movsd  -0x18(%rbp), %xmm2
    0x10023c19b <+59>: subsd  %xmm2, %xmm0
    0x10023c19f <+63>: mulsd  %xmm0, %xmm1
    0x10023c1a3 <+67>: addsd  %xmm1, %xmm2
    0x10023c1a7 <+71>: movaps %xmm2, %xmm0
    0x10023c1aa <+74>: addq   $0x20, %rsp
    0x10023c1ae <+78>: popq   %rbp
    0x10023c1af <+79>: retq   

And run "di -f" to get the assemble code of the hole file

swiftTest`main:
    0x10023bcd0 <+0>:    pushq  %rbp
    0x10023bcd1 <+1>:    movq   %rsp, %rbp
    0x10023bcd4 <+4>:    subq   $0x120, %rsp
    0x10023bcdb <+11>:   leaq   0x9340e(%rip), %rax       ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
    0x10023bce2 <+18>:   leaq   0x933ff(%rip), %rcx       ; static Swift.Process._argc : Swift.Int32
    0x10023bce9 <+25>:   movl   %edi, (%rcx)
    0x10023bceb <+27>:   cmpq   $-0x1, (%rax)
    0x10023bcf2 <+34>:   movq   %rsi, -0x60(%rbp)
    0x10023bcf6 <+38>:   je     0x10023bd0e               ; <+62> at main.swift
    0x10023bcf8 <+40>:   leaq   0x933f1(%rip), %rdi       ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
    0x10023bcff <+47>:   leaq   -0x99d56(%rip), %rax      ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func6
    0x10023bd06 <+54>:   movq   %rax, %rsi
    0x10023bd09 <+57>:   callq  0x100266870               ; swift_once
    0x10023bd0e <+62>:   leaq   0x933e3(%rip), %rax       ; static Swift.Process._unsafeArgv : Swift.UnsafeMutablePointer<Swift.UnsafeMutablePointer<Swift.Int8>>
    0x10023bd15 <+69>:   movq   -0x60(%rbp), %rcx
    0x10023bd19 <+73>:   movq   %rcx, (%rax)
    0x10023bd1c <+76>:   movq   $0x989680, 0x93499(%rip)  ; lazy cache variable for type metadata for Swift.VaListBuilder + 4
    0x10023bd27 <+87>:   movq   $0x0, 0x93496(%rip)       ; swiftTest.pointNumber : Swift.Int + 4
    0x10023bd32 <+98>:   movq   0x93487(%rip), %rax       ; swiftTest.pointNumber : Swift.Int
    0x10023bd39 <+105>:  movq   %rax, -0x68(%rbp)
    0x10023bd3d <+109>:  xorl   %eax, %eax
    0x10023bd3f <+111>:  movl   %eax, %ecx
    0x10023bd41 <+113>:  movq   -0x68(%rbp), %rdx
    0x10023bd45 <+117>:  cmpq   %rdx, %rcx
    0x10023bd48 <+120>:  setle  %sil
    0x10023bd4c <+124>:  testb  $0x1, %sil
    0x10023bd50 <+128>:  jne    0x10023bd54               ; <+132> at main.swift:17
    0x10023bd52 <+130>:  jmp    0x10023bdb3               ; <+227> at main.swift:17
    0x10023bd54 <+132>:  movq   -0x68(%rbp), %rax
    0x10023bd58 <+136>:  incq   %rax
    0x10023bd5b <+139>:  seto   %cl
    0x10023bd5e <+142>:  movq   -0x68(%rbp), %rdx
    0x10023bd62 <+146>:  cmpq   %rdx, %rax
    0x10023bd65 <+149>:  setg   %sil
    0x10023bd69 <+153>:  testb  $0x1, %sil
    0x10023bd6d <+157>:  movb   %cl, -0x69(%rbp)
    0x10023bd70 <+160>:  jne    0x10023bd74               ; <+164> at main.swift:17
    0x10023bd72 <+162>:  jmp    0x10023bd87               ; <+183> at main.swift:17
    0x10023bd74 <+164>:  movq   -0x68(%rbp), %rax
    0x10023bd78 <+168>:  incq   %rax
    0x10023bd7b <+171>:  seto   %cl
    0x10023bd7e <+174>:  movq   %rax, -0x78(%rbp)
    0x10023bd82 <+178>:  movb   %cl, -0x79(%rbp)
    0x10023bd85 <+181>:  jmp    0x10023bddf               ; <+271> at main.swift:17
    0x10023bd87 <+183>:  leaq   0x418a2(%rip), %rdi       ; "fatal error"
    0x10023bd8e <+190>:  movl   $0xb, %eax
    0x10023bd93 <+195>:  movl   %eax, %esi
    0x10023bd95 <+197>:  movl   $0x2, %eax
    0x10023bd9a <+202>:  leaq   0x487af(%rip), %rcx       ; "Range end index has no valid successor"
    0x10023bda1 <+209>:  movl   $0x26, %edx
    0x10023bda6 <+214>:  movl   %edx, %r8d
    0x10023bda9 <+217>:  movl   %eax, %edx
    0x10023bdab <+219>:  movl   %eax, %r9d
    0x10023bdae <+222>:  callq  0x1001a80f0               ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
    0x10023bdb3 <+227>:  leaq   0x41876(%rip), %rdi       ; "fatal error"
    0x10023bdba <+234>:  movl   $0xb, %eax
    0x10023bdbf <+239>:  movl   %eax, %esi
    0x10023bdc1 <+241>:  movl   $0x2, %eax
    0x10023bdc6 <+246>:  leaq   0x48753(%rip), %rcx       ; "Can't form Range with end < start"
    0x10023bdcd <+253>:  movl   $0x21, %edx
    0x10023bdd2 <+258>:  movl   %edx, %r8d
    0x10023bdd5 <+261>:  movl   %eax, %edx
    0x10023bdd7 <+263>:  movl   %eax, %r9d
    0x10023bdda <+266>:  callq  0x1001a80f0               ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
    0x10023bddf <+271>:  leaq   -0x30(%rbp), %rdi
    0x10023bde3 <+275>:  leaq   -0x20(%rbp), %rsi
    0x10023bde7 <+279>:  movq   $0x0, -0x20(%rbp)
    0x10023bdef <+287>:  movq   -0x78(%rbp), %rax
    0x10023bdf3 <+291>:  movq   %rax, -0x18(%rbp)
    0x10023bdf7 <+295>:  callq  0x1000362e0               ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.Range.generate <A where A: Swift.ForwardIndexType> (Swift.Range<A>)() -> Swift.RangeGenerator<A>
    0x10023bdfc <+300>:  movq   -0x30(%rbp), %rax
    0x10023be00 <+304>:  movq   -0x28(%rbp), %rsi
    0x10023be04 <+308>:  movq   %rax, -0x10(%rbp)
    0x10023be08 <+312>:  movq   %rsi, -0x8(%rbp)
    0x10023be0c <+316>:  leaq   -0x40(%rbp), %rdi
    0x10023be10 <+320>:  leaq   -0x10(%rbp), %rsi
    0x10023be14 <+324>:  callq  0x100036960               ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.RangeGenerator.next <A where A: Swift.ForwardIndexType> (inout Swift.RangeGenerator<A>)() -> Swift.Optional<A>
    0x10023be19 <+329>:  movq   -0x40(%rbp), %rsi
    0x10023be1d <+333>:  movb   -0x38(%rbp), %al
    0x10023be20 <+336>:  xorb   $0x1, %al
    0x10023be22 <+338>:  testb  $0x1, %al
    0x10023be24 <+340>:  movq   %rsi, -0x88(%rbp)
    0x10023be2b <+347>:  jne    0x10023be32               ; <+354> at main.swift:17
    0x10023be2d <+349>:  jmp    0x10023bed4               ; <+516> at main.swift:23
    0x10023be32 <+354>:  movsd  0x3bf66(%rip), %xmm0      ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
    0x10023be3a <+362>:  movsd  0x3bf56(%rip), %xmm1      ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
    0x10023be42 <+370>:  movq   -0x88(%rbp), %rax
    0x10023be49 <+377>:  movq   %rax, -0x48(%rbp)
    0x10023be4d <+381>:  callq  0x10023c160               ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
    0x10023be52 <+386>:  movsd  0x3bf46(%rip), %xmm1      ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
    0x10023be5a <+394>:  movsd  0x3bf36(%rip), %xmm2      ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
    0x10023be62 <+402>:  movsd  %xmm0, -0x50(%rbp)
    0x10023be67 <+407>:  movsd  %xmm0, -0x90(%rbp)
    0x10023be6f <+415>:  movaps %xmm1, %xmm0
    0x10023be72 <+418>:  movaps %xmm2, %xmm1
    0x10023be75 <+421>:  callq  0x10023c160               ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
    0x10023be7a <+426>:  movsd  0x3bf16(%rip), %xmm1      ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
    0x10023be82 <+434>:  movsd  %xmm0, -0x58(%rbp)
    0x10023be87 <+439>:  movsd  -0x90(%rbp), %xmm2
    0x10023be8f <+447>:  mulsd  %xmm2, %xmm2
    0x10023be93 <+451>:  mulsd  %xmm0, %xmm0
    0x10023be97 <+455>:  addsd  %xmm0, %xmm2
    0x10023be9b <+459>:  ucomisd %xmm2, %xmm1
    0x10023be9f <+463>:  jb     0x10023becf               ; <+511> at main.swift:23
    0x10023bea1 <+465>:  movq   0x93320(%rip), %rax       ; swiftTest.pointInsideCount : Swift.Int
    0x10023bea8 <+472>:  incq   %rax
    0x10023beab <+475>:  seto   %cl
    0x10023beae <+478>:  movq   %rax, -0x98(%rbp)
    0x10023beb5 <+485>:  movb   %cl, -0x99(%rbp)
    0x10023bebb <+491>:  jo     0x10023c155               ; <+1157> at main.swift:21
    0x10023bec1 <+497>:  movq   -0x98(%rbp), %rax
    0x10023bec8 <+504>:  movq   %rax, 0x932f9(%rip)       ; swiftTest.pointInsideCount : Swift.Int
    0x10023becf <+511>:  jmp    0x10023be0c               ; <+316> at main.swift:17
    0x10023bed4 <+516>:  movsd  0x3beb4(%rip), %xmm0      ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 328
    0x10023bedc <+524>:  cvtsi2sdq 0x932e3(%rip), %xmm1      ; swiftTest.pointInsideCount : Swift.Int
    0x10023bee5 <+533>:  cvtsi2sdq 0x932d2(%rip), %xmm2      ; swiftTest.pointNumber : Swift.Int
    0x10023beee <+542>:  divsd  %xmm2, %xmm1
    0x10023bef2 <+546>:  mulsd  %xmm0, %xmm1
    0x10023bef6 <+550>:  movsd  %xmm1, 0x932d2(%rip)      ; swiftTest.result : Swift.Double
    0x10023befe <+558>:  callq  0x10023c1b0               ; type metadata accessor for Swift.CVarArgType
    0x10023bf03 <+563>:  movl   $0x1, %ecx
    0x10023bf08 <+568>:  movl   %ecx, %edi
    0x10023bf0a <+570>:  movq   %rax, %rsi
    0x10023bf0d <+573>:  callq  0x100045770               ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
    0x10023bf12 <+578>:  leaq   0x4865e(%rip), %rdi       ; "%.50f"
    0x10023bf19 <+585>:  movl   $0x5, %ecx
    0x10023bf1e <+590>:  movl   %ecx, %esi
    0x10023bf20 <+592>:  movl   $0x1, %ecx
    0x10023bf25 <+597>:  movq   %rdx, -0xa8(%rbp)
    0x10023bf2c <+604>:  movl   %ecx, %edx
    0x10023bf2e <+606>:  movq   %rax, -0xb0(%rbp)
    0x10023bf35 <+613>:  callq  0x100001aa0               ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
    0x10023bf3a <+618>:  leaq   0x667b7(%rip), %rsi       ; protocol witness table for Swift.Double : Swift.CVarArgType in Swift
    0x10023bf41 <+625>:  leaq   0x6a258(%rip), %rdi       ; direct type metadata for Swift.Double
    0x10023bf48 <+632>:  addq   $0x8, %rdi
    0x10023bf4f <+639>:  movq   -0xa8(%rbp), %r8
    0x10023bf56 <+646>:  movq   %rdi, 0x18(%r8)
    0x10023bf5a <+650>:  movq   %rsi, 0x20(%r8)
    0x10023bf5e <+654>:  movsd  0x9326a(%rip), %xmm0      ; swiftTest.result : Swift.Double
    0x10023bf66 <+662>:  movsd  %xmm0, (%r8)
    0x10023bf6b <+667>:  movq   %rax, %rdi
    0x10023bf6e <+670>:  movq   %rdx, %rsi
    0x10023bf71 <+673>:  movq   %rcx, %rdx
    0x10023bf74 <+676>:  movq   -0xb0(%rbp), %rcx
    0x10023bf7b <+683>:  callq  0x10002dfa0               ; ext.Foundation.Swift.String.init (Swift.String.Type)(format : Swift.String, Swift.Array<Swift.CVarArgType>...) -> Swift.String
    0x10023bf80 <+688>:  movq   %rax, 0x93251(%rip)       ; swiftTest.piString : Swift.String
    0x10023bf87 <+695>:  movq   %rdx, 0x93252(%rip)       ; swiftTest.piString : Swift.String + 8
    0x10023bf8e <+702>:  movq   %rcx, 0x93253(%rip)       ; swiftTest.piString : Swift.String + 16
->  0x10023bf95 <+709>:  callq  0x10023c200               ; type metadata accessor for protocol<>
    0x10023bf9a <+714>:  movl   $0x1, %r9d
    0x10023bfa0 <+720>:  movl   %r9d, %edi
    0x10023bfa3 <+723>:  movq   %rax, %rsi
    0x10023bfa6 <+726>:  callq  0x100045770               ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
    0x10023bfab <+731>:  movl   $0x3, %r9d
    0x10023bfb1 <+737>:  movl   %r9d, %edi
    0x10023bfb4 <+740>:  leaq   0x6fe25(%rip), %rcx       ; direct type metadata for Swift.String
    0x10023bfbb <+747>:  addq   $0x8, %rcx
    0x10023bfc2 <+754>:  movq   %rcx, 0x18(%rdx)
    0x10023bfc6 <+758>:  movq   %rcx, %rsi
    0x10023bfc9 <+761>:  movq   %rax, -0xb8(%rbp)
    0x10023bfd0 <+768>:  movq   %rdx, -0xc0(%rbp)
    0x10023bfd7 <+775>:  callq  0x100045770               ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
    0x10023bfdc <+780>:  leaq   0x4859a(%rip), %rdi       ; "Pi is "
    0x10023bfe3 <+787>:  movl   $0x6, %r9d
    0x10023bfe9 <+793>:  movl   %r9d, %esi
    0x10023bfec <+796>:  movl   $0x1, %r9d
    0x10023bff2 <+802>:  movq   %rdx, -0xc8(%rbp)
    0x10023bff9 <+809>:  movl   %r9d, %edx
    0x10023bffc <+812>:  movq   %rax, -0xd0(%rbp)
    0x10023c003 <+819>:  callq  0x100001aa0               ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
    0x10023c008 <+824>:  movq   %rax, %rdi
    0x10023c00b <+827>:  movq   %rdx, %rsi
    0x10023c00e <+830>:  movq   %rcx, %rdx
    0x10023c011 <+833>:  callq  0x1000470d0               ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
    0x10023c016 <+838>:  movq   -0xc8(%rbp), %rsi
    0x10023c01d <+845>:  movq   %rax, (%rsi)
    0x10023c020 <+848>:  movq   %rdx, 0x8(%rsi)
    0x10023c024 <+852>:  movq   %rcx, 0x10(%rsi)
    0x10023c028 <+856>:  movq   0x931a9(%rip), %rdi       ; swiftTest.piString : Swift.String
    0x10023c02f <+863>:  movq   0x931aa(%rip), %rsi       ; swiftTest.piString : Swift.String + 8
    0x10023c036 <+870>:  movq   0x931ab(%rip), %rax       ; swiftTest.piString : Swift.String + 16
    0x10023c03d <+877>:  movq   %rdi, -0xd8(%rbp)
    0x10023c044 <+884>:  movq   %rax, %rdi
    0x10023c047 <+887>:  movq   %rsi, -0xe0(%rbp)
    0x10023c04e <+894>:  movq   %rax, -0xe8(%rbp)
    0x10023c055 <+901>:  callq  0x100268160               ; swift_unknownRetain
    0x10023c05a <+906>:  movq   -0xd8(%rbp), %rdi
    0x10023c061 <+913>:  movq   -0xe0(%rbp), %rsi
    0x10023c068 <+920>:  movq   -0xe8(%rbp), %rdx
    0x10023c06f <+927>:  callq  0x1000470d0               ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
    0x10023c074 <+932>:  leaq   0x40d15(%rip), %rdi       ; ""
    0x10023c07b <+939>:  xorl   %r9d, %r9d
    0x10023c07e <+942>:  movl   %r9d, %esi
    0x10023c081 <+945>:  movl   $0x1, %r9d
    0x10023c087 <+951>:  movq   -0xc8(%rbp), %r8
    0x10023c08e <+958>:  movq   %rax, 0x18(%r8)
    0x10023c092 <+962>:  movq   %rdx, 0x20(%r8)
    0x10023c096 <+966>:  movq   %rcx, 0x28(%r8)
    0x10023c09a <+970>:  movl   %r9d, %edx
    0x10023c09d <+973>:  callq  0x100001aa0               ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
    0x10023c0a2 <+978>:  movq   %rax, %rdi
    0x10023c0a5 <+981>:  movq   %rdx, %rsi
    0x10023c0a8 <+984>:  movq   %rcx, %rdx
    0x10023c0ab <+987>:  callq  0x1000470d0               ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
    0x10023c0b0 <+992>:  movq   -0xc8(%rbp), %rsi
    0x10023c0b7 <+999>:  movq   %rax, 0x30(%rsi)
    0x10023c0bb <+1003>: movq   %rdx, 0x38(%rsi)
    0x10023c0bf <+1007>: movq   %rcx, 0x40(%rsi)
    0x10023c0c3 <+1011>: movq   -0xd0(%rbp), %rdi
    0x10023c0ca <+1018>: callq  0x1000470c0               ; Swift.String.init (Swift.String.Type)(stringInterpolation : Swift.Array<Swift.String>...) -> Swift.String
    0x10023c0cf <+1023>: movq   -0xc0(%rbp), %rsi
    0x10023c0d6 <+1030>: movq   %rax, (%rsi)
    0x10023c0d9 <+1033>: movq   %rdx, 0x8(%rsi)
    0x10023c0dd <+1037>: movq   %rcx, 0x10(%rsi)
    0x10023c0e1 <+1041>: callq  0x10012aa70               ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 1)
    0x10023c0e6 <+1046>: movq   %rax, -0xf0(%rbp)
    0x10023c0ed <+1053>: movq   %rdx, -0xf8(%rbp)
    0x10023c0f4 <+1060>: movq   %rcx, -0x100(%rbp)
    0x10023c0fb <+1067>: callq  0x10012aa90               ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 2)
    0x10023c100 <+1072>: movq   -0xb8(%rbp), %rdi
    0x10023c107 <+1079>: movq   -0xf0(%rbp), %rsi
    0x10023c10e <+1086>: movq   -0xf8(%rbp), %r8
    0x10023c115 <+1093>: movq   %rdx, -0x108(%rbp)
    0x10023c11c <+1100>: movq   %r8, %rdx
    0x10023c11f <+1103>: movq   -0x100(%rbp), %r10
    0x10023c126 <+1110>: movq   %rcx, -0x110(%rbp)
    0x10023c12d <+1117>: movq   %r10, %rcx
    0x10023c130 <+1120>: movq   %rax, %r8
    0x10023c133 <+1123>: movq   -0x108(%rbp), %r9
    0x10023c13a <+1130>: movq   -0x110(%rbp), %rax
    0x10023c141 <+1137>: movq   %rax, (%rsp)
    0x10023c145 <+1141>: callq  0x10012aab0               ; Swift.print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()
    0x10023c14a <+1146>: xorl   %eax, %eax
    0x10023c14c <+1148>: addq   $0x120, %rsp
    0x10023c153 <+1155>: popq   %rbp
    0x10023c154 <+1156>: retq   
    0x10023c155 <+1157>: ud2    
    0x10023c157 <+1159>: nopw   (%rax,%rax)

Can I estimate time consumption as below?

The randomNumber function consists of about 20 instructions, hence calculations of x and y consist of about 40 instructions. The adding of pointInsideCount just execute several instructions, so in the for loop there is about 4 ~ 5 dozen of instructions(assume it is 50). Time consumption outside of the for loop can be ignored.

If I assume 4560U run 2 instructions per cycle in this program on average, when loop count is 10^8, the hole time consumption is about 50 * 10^8 / (1.7 * 10^9 * 2)


Solution

  • You can't just assume the same IPC for all loops. Sure that loop runs 2 IPC, but that doesn't tell you anything about other loops. You have to analyse the code carefully to find the bottlenecks and the amount of parallelism.


    If you can safely assume no cache-misses or branch-mispredicts, you can get reasonable cycle-count estimates for small loops for specific Intel microarchitectures by using IACA, Intel's static code analyser. It is far from a full cycle-accurate simulation of real hardware, but it does have its own model for distributing uops to ports. It usually gets sensible numbers.

    You can also do the same sort of analysis by hand (including for CPUs that IACA doesn't know about) using Agner Fog's instruction tables and microarchitecture guides.

    Things usually work out quite accurately when a loop is bottlenecked on the latency of a loop-carried dependency chain, or on saturating only one execution port.

    At high throughputs, there are many subtle effects that can bottleneck code that you'd hope could run at 4 fused-domain uops per clock. The frontend can only sustain that for quite small loops (~28 or 56 uops), because even the uop cache has limited throughput because of uop-cache-line boundaries and uops not being in groups of 4.

    Significant FMA performance anomaly experienced in the Intel Broadwell processor is a good example of how things can get really hard to understand. You'd expect the code to just saturate all three vector execution ports, which it does on Haswell, and nearly on Skylake, but not even close on Broadwell. And that's not even a front-end bottleneck, since the loop is small enough fit in the loop buffer.

    Again, all of this is without considering branch mispredicts or cache misses.

    If this all sounds really hard and complicated, that's because it is. This is why benchmarks are more useful than static analysis. However, microbenchmarks are really easy to get wrong. You should look at asm to make sure you didn't screw up and let the compiler optimize away the thing you wanted to test. You also need to understand a lot about how CPUs work to avoid pitfalls, like putting something else slow into your microbenchmark, and having that dominate the run-time instead of the thing you wanted to test.