Search code examples
for-loopwhile-looprustmemmove

Which is faster for reverse iteration, for or while loops?


I am trying to implement the standard memmove function in Rust and I was wondering which method is faster for downwards iteration (where src < dest):

for i in (0..n).rev() {
    //Do copying
}

or

let mut i = n;
while i != 0 {
    i -= 1;
    // Do copying
}

Will the rev() in the for loops version significantly slow it down?


Solution

  • TL;DR: Use the for loop.


    Both should be equally fast. We can check the compiler's ability to peel away the layers of abstraction involved in the for loop quite simply:

    #[inline(never)]
    fn blackhole() {}
    
    #[inline(never)]
    fn with_for(n: usize) {
        for i in (0..n).rev() { blackhole(); }
    }
    
    #[inline(never)]
    fn with_while(n: usize) {
        let mut i = n;
        while i > 0 {
            blackhole();
            i -= 1;
        }
    }
    

    This generates this LLVM IR:

    ; Function Attrs: noinline nounwind readnone uwtable
    define internal void @_ZN8with_for20h645c385965fcce1fhaaE(i64) unnamed_addr #0 {
    entry-block:
      ret void
    }
    
    ; Function Attrs: noinline nounwind readnone uwtable
    define internal void @_ZN10with_while20hc09c3331764a9434yaaE(i64) unnamed_addr #0 {
    entry-block:
      ret void
    }
    

    Even if you are not versed in LLVM, it is obvious that both functions compiled down to the same IR (and thus obviously to the same assembly).

    Since their performance is the same, one should prefer the more explicit for loop and reserve the while loop to cases where the iteration is irregular.


    EDIT: to address starblue's concern of unfitness.

    #[link(name = "snappy")]
    extern {
        fn blackhole(i: libc::c_int) -> libc::c_int;
    }
    
    #[inline(never)]
    fn with_for(n: i32) {
        for i in (0..n).rev() { unsafe { blackhole(i as libc::c_int); } }
    }
    
    #[inline(never)]
    fn with_while(n: i32) {
        let mut i = n;
        while i > 0 {
            unsafe { blackhole(i as libc::c_int); }
            i -= 1;
        }
    }
    

    compiles down to:

    ; Function Attrs: noinline nounwind uwtable
    define internal void @_ZN8with_for20h7cf06f33e247fa35maaE(i32) unnamed_addr #1 {
    entry-block:
      %1 = icmp sgt i32 %0, 0
      br i1 %1, label %match_case.preheader, label %clean_ast_95_
    
    match_case.preheader:                             ; preds = %entry-block
      br label %match_case
    
    match_case:                                       ; preds = %match_case.preheader, %match_case
      %.in = phi i32 [ %2, %match_case ], [ %0, %match_case.preheader ]
      %2 = add i32 %.in, -1
      %3 = tail call i32 @blackhole(i32 %2)
      %4 = icmp sgt i32 %2, 0
      br i1 %4, label %match_case, label %clean_ast_95_.loopexit
    
    clean_ast_95_.loopexit:                           ; preds = %match_case
      br label %clean_ast_95_
    
    clean_ast_95_:                                    ; preds = %clean_ast_95_.loopexit, %entry-block
      ret void
    }
    
    ; Function Attrs: noinline nounwind uwtable
    define internal void @_ZN10with_while20hee8edd624cfe9293IaaE(i32) unnamed_addr #1 {
    entry-block:
      %1 = icmp sgt i32 %0, 0
      br i1 %1, label %while_body.preheader, label %while_exit
    
    while_body.preheader:                             ; preds = %entry-block
      br label %while_body
    
    while_exit.loopexit:                              ; preds = %while_body
      br label %while_exit
    
    while_exit:                                       ; preds = %while_exit.loopexit, %entry-block
      ret void
    
    while_body:                                       ; preds = %while_body.preheader, %while_body
      %i.05 = phi i32 [ %3, %while_body ], [ %0, %while_body.preheader ]
      %2 = tail call i32 @blackhole(i32 %i.05)
      %3 = add nsw i32 %i.05, -1
      %4 = icmp sgt i32 %i.05, 1
      br i1 %4, label %while_body, label %while_exit.loopexit
    }
    

    The core loops are:

    ; -- for loop
    match_case:                                       ; preds = %match_case.preheader, %match_case
      %.in = phi i32 [ %2, %match_case ], [ %0, %match_case.preheader ]
      %2 = add i32 %.in, -1
      %3 = tail call i32 @blackhole(i32 %2)
      %4 = icmp sgt i32 %2, 0
      br i1 %4, label %match_case, label %clean_ast_95_.loopexit
    
    ; -- while loop
    while_body:                                       ; preds = %while_body.preheader, %while_body
      %i.05 = phi i32 [ %3, %while_body ], [ %0, %while_body.preheader ]
      %2 = tail call i32 @blackhole(i32 %i.05)
      %3 = add nsw i32 %i.05, -1
      %4 = icmp sgt i32 %i.05, 1
      br i1 %4, label %while_body, label %while_exit.loopexit
    

    And the only difference is that:

    • for decrements before calling blackhole, while decrements after
    • for compares against 0, while compares against 1

    otherwise, it's the same core loop.