Search code examples
perllanguage-agnosticrefactoringtail-recursiontail-call

How do I refactor a recursion occurring in a for loop to make it a tail call?


Consider the recursive subroutine append_until_exhausted. The recursion occurs in the middle of the body. I want to place it at the end for further processing, that is to say a simple tail call (without any optimisation, which in Perl typically involves a goto). You can change anything but the signature of the subroutine and the two helper subroutines.

The algorithms involving numerics look stupid because are a condensation/obfuscation of my real code, but the code execution path/structure of subroutine calls is unchanged.

use 5.032;
use strictures;
use experimental qw(signatures);

# Returns mostly one value, sometimes multiple,
# and an occasional end condition which will cause
# the recursion to end because then the for loop will
# iterate over an empty list.
# This sub is also called from elsewhere,
# do not change, do not inline.
sub some_complicated_computation($foo) { # → ArrayRef[$foo]
    return [] if $foo > 45;
    return $foo % 5
        ? [$foo + 1]
        : [$foo + 2, $foo + 3];
}

# do not inline
sub make_key($foo) { # → Str
    chr(64 + $foo / 5)
}

sub append_until_exhausted($foo, $appendix) { # → HashRef[ArrayRef[$foo]]
    my $computed = some_complicated_computation($foo);
    for my $new_foo ($computed->@*) {
        {
            push $appendix->{make_key $new_foo}->@*, $new_foo;
        }
        __SUB__->($new_foo, $appendix);
    }
    return $appendix;
}

my $new_appendix = append_until_exhausted(
    7, # start value for foo
    { dummy => [], dummy2 => [], dummy3 => [], }
);

The goal here is for me to understand the principle so I can apply it in similar situations and in similar languages. It does not help if you suggest some {Sub::*, B::*, XS} magic.


Solution

  • Let's start with a simple example.

    sub fact($n) {
       return 1 if $n == 0;
       return $n * fact($n-1);
    }
    

    To make something tail-recursive, you need to pass the information needed to perform the tail operation along with the call.

    sub _fact($n, $acc) {
       return $acc if $n == 0;
       return _fact($n-1, $n * $acc);
    }
    
    sub fact($n) {
       return _fact($n, 1);
    }
    

    This particular solution relies on the fact that multiplication is commutative. (We replaced 1*2*3*4 with 1*4*3*2.) So we still need a generic approach.


    A generic approach would involve passing the tail as a callback. This means that

    if (TERMINAL_COND())
       return TERMINAL_VALUE();
    } else {
       return TAIL(recursive(HEAD()))
    }
    

    becomes

    # Extra argument $tail
    if (TERMINAL_COND()) {
       return $tail->(TERMINAL_VALUE());   # Tail call
    } else {
       return recursive(HEAD(), sub {      # Tail call
          return $tail->(TAIL($_[0]);      # Tail call
       });
    }
    

    This gives us the following:

    sub _fact($n, $tail) {
       return $tail->(1) if $n == 0;
       return _fact($n-1, sub($fact) {
          return $tail->( $fact * $n );
       });
    }
    
    sub fact($n) {
       return _fact($n, sub($fact) { $fact });
    }
    

    This is basically how Promises work.

    # Promise is a fictional class akin
    # to the JS one with the same name.
    
    sub fact_p($n) {
       return Promise->new(1) if $n == 0;
       return fact_p($n-1)->then(sub($fact) {
          return $fact * $n;
       });
    }
    
    fact_p($n)->done(sub($fact) {
       say $fact;
    });
    

    What you have is a lot trickier because you have multiple recursive calls. But we can still apply the same technique.

    # Loop body
    sub __append_until_exhausted($appendix, $computed, $i, $tail) {
       if ($i == $computed->@*) {
          return $tail->();  # TC
       } else {
          my $new_foo = $computed->[$i];
          push $appendix->{make_key $new_foo}->@*, $new_foo;
          return _append_until_exhausted($appendix, $new_foo, sub {  # TC
             return __append_until_exhausted($appendix, $computed, $i+1, $tail);  # TC
          });
       }
    }
    
    # Function body
    sub _append_until_exhausted($appendix, $foo, $tail) {
       my $computed = some_complicated_computation($foo);
       return __append_until_exhausted($appendix, $computed, 0, $tail);  # TC
    }
    
    # Public interface
    sub append_until_exhausted($appendix, $foo) {
       return _append_until_exhausted($appendix, $foo, sub {  # TC
          return $appendix;
       });
    }
    

    We can avoid all the extra copies of $appendix as follows:

    sub append_until_exhausted($appendix, $foo) {
       local *helper2 = sub($computed, $i, $tail) {
          if ($i == $computed->@*) {
             return $tail->();  # TC
          } else {
             my $new_foo = $computed->[$i];
             push $appendix->{make_key $new_foo}->@*, $new_foo;
             return helper1($new_foo, sub {  # TC
                return helper2($computed, $i+1, $tail);  # TC
             });
          }
       };
    
       local *helper1 = sub($foo, $tail) {
          my $computed = some_complicated_computation($foo);
          return helper2($computed, 0, $tail);  # TC
       };
    
       return helper1($foo, sub {  # TC
          return $appendix;
       });
    }
    

    Perl doesn't perform tail-call elimination, and function calls are rather slow. You'd be better off using an array as a stack.

    This performs the work in the same order as the original:

    sub append_until_exhausted($foo, $appendix) {
       my @todo = [ $foo, undef, 0 ];
       while (@todo) {
          my $todo = $todo[-1];
          \my ( $foo, $computed, $i ) = \( @$todo );
          $computed //= some_complicated_computation($foo);
          if ($i == $computed->@*) {
             pop(@todo);
             next;
          }
    
          my $new_foo = $computed->[$i++];
          push $appendix->{make_key $new_foo}->@*, $new_foo;
          push @todo, [ $new_foo, undef, 0 ];
       }
    
       return $appendix;
    }
    

    If you don't mind doing the complicated computation out of order (while still preserving the result), the above simplifies to the following:

    sub append_until_exhausted($foo, $appendix) {
       my @todo = some_complicated_computation($foo);
       while (@todo) {
          my $computed = $todo[-1];
          if (!$computed->@*) {
             pop(@todo);
             next;
          }
    
          my $new_foo = shift(@$computed);
          push $appendix->{make_key $new_foo}->@*, $new_foo;
          push @todo, some_complicated_computation($new_foo);
       }
    
       return $appendix;
    }