Custom Storable hooks for dclone-ing a light-weight object referencing a heavy-weight object

Say I have a tiny object that has a reference to a huge object:

package Tiny;

sub new {
    my ($class, $tiny, $large) = @_;
    return bless { tiny => $tiny, large => $large };
}

I'd like to create a STORABLE_freeze/STORABLE_thaw pair that lets me (recursively) clone $tiny but maintain/keep the reference to $large as-is without cloning $large too.

I tried temporarily deleting $self->{large} (see below), and putting it in a hash with a Scalar::Util::refaddr key and a weak reference to $large, serializing the rest of $self, and then putting the (weak) reference back into both the original object immediately and the cloned one in STORABLE_thaw, but it is a mess, and on every clone, the weak ref value gets deleted when it goes out of scope, but the key remains in the hash forever leaking memory and I need a global class member hash (%largeWeakRefs) to hold the temporary $large reference. Has a smell.

How it that possible to do this in a cleaner way?

Here is my solution using the hash to hold the large ref temporarily:

package Tiny;

use Scalar::Util qw(refaddr weaken);

sub new {
    my ( $class, $tiny, $large ) = @_;
    return bless { tiny => $tiny, large => $large }, $class;
}

# Ugly temporary storage to hold $large refs from _freeze to _thaw...
my %largeWeakRefs;
sub STORABLE_freeze {
    my ( $self, $cloning ) = @_;
    my $large = delete local $self->{large};
    my $refaddr = refaddr $large;
    $largeWeakRefs{$refaddr} = $large;
    weaken $largeWeakRefs{$refaddr};
    my %restOfSelf = %$self;
    $self->{large} = $large;
    return $refaddr, \%restOfSelf;
}

sub STORABLE_thaw {
    my ($self, $cloning, $refaddr, $restOfSelf) = @_;
    %$self = %$restOfSelf;
    $self->{large} = $largeWeakRefs{$refaddr};
    return $self;
}

(Yes I know, my example only handles cloning, not straight-up freeze and thaw)

Solution

You could add reference counts.

my %larges;

sub STORABLE_freeze {
   my ( $self, $cloning ) = @_;
   if ($cloning) {
      my $large_key = pack('j', refaddr(self->{large})); 
      $larges{$large_key} //= [ $self->{large}, 0 ];
      ++$larges{$large_key}[1];
      return ( $large_key, $self->{tiny} );
   } else {
      return ( "", $self->{tiny}, $self->{large} );
   }
}

sub STORABLE_thaw {
   my ( $self, $cloning, $serialized ) = splice(@_, 0, 3);
   if ($cloning) {
      my $large_key = $serialized;
      $self->{ tiny  } = shift;
      $self->{ large } = $larges{$large_key}[0];
      --$larges{$large_key}[1]
         or delete($larges{$large_key});
   } else {
      $self->{ tiny  } = shift;
      $self->{ large } = shift;
   }
}

Untested.

If the cloning process dies, you'll have a memory leak.

Alternatively, you could avoid the need for external resources as follows:

use Inline C => <<'__EOS__';

   IV get_numeric_ref(SV *sv) {
      SvGETMAGIC(sv);
      if (!SvROK(sv))
         croak("Argument not a reference");

      sv = MUTABLE_SV(SvRV(sv));
      SvREFCNT_inc(sv);
      return PTR2IV(sv);   /* Despite its name, can be used to convert pointer to IV */
   }

   SV* get_perl_ref_from_numeric_ref(IV iv) {
      SV* sv = PTR2IV(iv);
      return newRV_noinc(sv);
   }

__EOS__

sub STORABLE_freeze {
   my ( $self, $cloning ) = @_;
   if ($cloning) {
      return ( pack('j', get_numeric_ref($self->{large})), $self->{tiny} );
   } else {
      return ( "", $self->{tiny}, $self->{large} );
   }
}

sub STORABLE_thaw {
   my ( $self, $cloning, $serialized ) = splice(@_, 0, 3);
   if ($cloning) {
      $self->{ tiny  } = shift;
      $self->{ large } = get_perl_ref_from_numeric_ref(unpack('j', $serialized));
   } else {
      $self->{ tiny  } = shift;
      $self->{ large } = shift;
   }
}

Didn't test STORABLE_freeze and STORABLE_thaw, but tested the C/XS code using the following:

use strict;
use warnings;
use feature qw( say state );

use Cpanel::JSON::XS qw( );

sub _dump {
   state $encoder = Cpanel::JSON::XS->new->canonical->allow_nonref;
   return $encoder->encode($_[0]);
}

{
   my %h = ( a => 4, b => 5 );
   say _dump(\%h);                                # {"a":4,"b":5}
   say sprintf "0x%x", \%h;                       # 0x32cdbf8
   say Internals::SvREFCNT(%h);                   # 1
   my $i = get_numeric_ref(\%h);
   say sprintf "0x%x", $i;                        # 0x32cdbf8
   say Internals::SvREFCNT(%h);                   # 2
   my $ref = get_perl_ref_from_numeric_ref($i);
   say sprintf "0x%x", $ref;                      # 0x32cdbf8
   say Internals::SvREFCNT(%h);                   # 2
   say _dump($ref);                               # {"a":4,"b":5}
}

If the cloning process dies, you'll have a memory leak. I suppose it would be safe to rely on "large" not going anywhere during the cloning process, so you could remove the SvREFCNT_inc and change newRV_noinc to newRV to avoid the potential memory leak.

To avoid the possible memory leak, never store "large" in the object.

my %larges;

sub new {
   my $class = shift;
   my $self = bless({}, $class);
   return $self->_init(@_);
}

sub _init {
   my ($self, $tiny, $large) = @_;
   $self->{ tiny } = $tiny;

   {
      my $large_key = pack('j', refaddr($self));
      $self->{ large_key } = $large_key;
      $larges{ $large_key } = $large;
   }

   return $self;
}

sub DESTROY {
   my ($self) = @_;
   if (defined( my $large_key = $self->{ large_key } )) {
      delete( $larges{ $large_key } );
   }
}

sub STORABLE_freeze {
   my ( $self, $cloning ) = @_;
   if ($cloning) {
      return ( $self->{large_key}, $self->{tiny} );
   } else {
      return ( "", $self->{tiny}, $larges{ $self->{large_key} } );
   }
}

sub STORABLE_thaw {
   my ( $self, $cloning, $serialized ) = splice(@_, 0, 3);
   if ($cloning) {
      my ($tiny) = @_;
      my $large_key = $serialized;
      $self->_init($tiny, $larges{ $large_key });
   } else {
      $self->_init(@_);
   }
}

Untested.

No memory leaks if the cloning process dies.