perl language-agnostic refactoring tail-recursion tail-call

How do I refactor a recursion occurring in a for loop to make it a tail call?

Consider the recursive subroutine append_until_exhausted. The recursion occurs in the middle of the body. I want to place it at the end for further processing, that is to say a simple tail call (without any optimisation, which in Perl typically involves a goto). You can change anything but the signature of the subroutine and the two helper subroutines.

The algorithms involving numerics look stupid because are a condensation/obfuscation of my real code, but the code execution path/structure of subroutine calls is unchanged.

use 5.032;
use strictures;
use experimental qw(signatures);

# Returns mostly one value, sometimes multiple,
# and an occasional end condition which will cause
# the recursion to end because then the for loop will
# iterate over an empty list.
# This sub is also called from elsewhere,
# do not change, do not inline.
sub some_complicated_computation($foo) { # → ArrayRef[$foo]
    return [] if $foo > 45;
    return $foo % 5
        ? [$foo + 1]
        : [$foo + 2, $foo + 3];
}

# do not inline
sub make_key($foo) { # → Str
    chr(64 + $foo / 5)
}

sub append_until_exhausted($foo, $appendix) { # → HashRef[ArrayRef[$foo]]
    my $computed = some_complicated_computation($foo);
    for my $new_foo ($computed->@*) {
        {
            push $appendix->{make_key $new_foo}->@*, $new_foo;
        }
        __SUB__->($new_foo, $appendix);
    }
    return $appendix;
}

my $new_appendix = append_until_exhausted(
    7, # start value for foo
    { dummy => [], dummy2 => [], dummy3 => [], }
);

The goal here is for me to understand the principle so I can apply it in similar situations and in similar languages. It does not help if you suggest some {Sub::*, B::*, XS} magic.

Solution

Let's start with a simple example.

sub fact($n) {
   return 1 if $n == 0;
   return $n * fact($n-1);
}

To make something tail-recursive, you need to pass the information needed to perform the tail operation along with the call.

sub _fact($n, $acc) {
   return $acc if $n == 0;
   return _fact($n-1, $n * $acc);
}

sub fact($n) {
   return _fact($n, 1);
}

This particular solution relies on the fact that multiplication is commutative. (We replaced 1*2*3*4 with 1*4*3*2.) So we still need a generic approach.

A generic approach would involve passing the tail as a callback. This means that

if (TERMINAL_COND())
   return TERMINAL_VALUE();
} else {
   return TAIL(recursive(HEAD()))
}

becomes

# Extra argument $tail
if (TERMINAL_COND()) {
   return $tail->(TERMINAL_VALUE());   # Tail call
} else {
   return recursive(HEAD(), sub {      # Tail call
      return $tail->(TAIL($_[0]);      # Tail call
   });
}

This gives us the following:

sub _fact($n, $tail) {
   return $tail->(1) if $n == 0;
   return _fact($n-1, sub($fact) {
      return $tail->( $fact * $n );
   });
}

sub fact($n) {
   return _fact($n, sub($fact) { $fact });
}

This is basically how Promises work.

# Promise is a fictional class akin
# to the JS one with the same name.

sub fact_p($n) {
   return Promise->new(1) if $n == 0;
   return fact_p($n-1)->then(sub($fact) {
      return $fact * $n;
   });
}

fact_p($n)->done(sub($fact) {
   say $fact;
});

What you have is a lot trickier because you have multiple recursive calls. But we can still apply the same technique.

# Loop body
sub __append_until_exhausted($appendix, $computed, $i, $tail) {
   if ($i == $computed->@*) {
      return $tail->();  # TC
   } else {
      my $new_foo = $computed->[$i];
      push $appendix->{make_key $new_foo}->@*, $new_foo;
      return _append_until_exhausted($appendix, $new_foo, sub {  # TC
         return __append_until_exhausted($appendix, $computed, $i+1, $tail);  # TC
      });
   }
}

# Function body
sub _append_until_exhausted($appendix, $foo, $tail) {
   my $computed = some_complicated_computation($foo);
   return __append_until_exhausted($appendix, $computed, 0, $tail);  # TC
}

# Public interface
sub append_until_exhausted($appendix, $foo) {
   return _append_until_exhausted($appendix, $foo, sub {  # TC
      return $appendix;
   });
}

We can avoid all the extra copies of $appendix as follows:

sub append_until_exhausted($appendix, $foo) {
   local *helper2 = sub($computed, $i, $tail) {
      if ($i == $computed->@*) {
         return $tail->();  # TC
      } else {
         my $new_foo = $computed->[$i];
         push $appendix->{make_key $new_foo}->@*, $new_foo;
         return helper1($new_foo, sub {  # TC
            return helper2($computed, $i+1, $tail);  # TC
         });
      }
   };

   local *helper1 = sub($foo, $tail) {
      my $computed = some_complicated_computation($foo);
      return helper2($computed, 0, $tail);  # TC
   };

   return helper1($foo, sub {  # TC
      return $appendix;
   });
}

Perl doesn't perform tail-call elimination, and function calls are rather slow. You'd be better off using an array as a stack.

This performs the work in the same order as the original:

sub append_until_exhausted($foo, $appendix) {
   my @todo = [ $foo, undef, 0 ];
   while (@todo) {
      my $todo = $todo[-1];
      \my ( $foo, $computed, $i ) = \( @$todo );
      $computed //= some_complicated_computation($foo);
      if ($i == $computed->@*) {
         pop(@todo);
         next;
      }

      my $new_foo = $computed->[$i++];
      push $appendix->{make_key $new_foo}->@*, $new_foo;
      push @todo, [ $new_foo, undef, 0 ];
   }

   return $appendix;
}

If you don't mind doing the complicated computation out of order (while still preserving the result), the above simplifies to the following:

sub append_until_exhausted($foo, $appendix) {
   my @todo = some_complicated_computation($foo);
   while (@todo) {
      my $computed = $todo[-1];
      if (!$computed->@*) {
         pop(@todo);
         next;
      }

      my $new_foo = shift(@$computed);
      push $appendix->{make_key $new_foo}->@*, $new_foo;
      push @todo, some_complicated_computation($new_foo);
   }

   return $appendix;
}