Search code examples
perlclone

Custom Storable hooks for dclone-ing a light-weight object referencing a heavy-weight object


Say I have a tiny object that has a reference to a huge object:

package Tiny;

sub new {
    my ($class, $tiny, $large) = @_;
    return bless { tiny => $tiny, large => $large };
}

I'd like to create a STORABLE_freeze/STORABLE_thaw pair that lets me (recursively) clone $tiny but maintain/keep the reference to $large as-is without cloning $large too.

I tried temporarily deleting $self->{large} (see below), and putting it in a hash with a Scalar::Util::refaddr key and a weak reference to $large, serializing the rest of $self, and then putting the (weak) reference back into both the original object immediately and the cloned one in STORABLE_thaw, but it is a mess, and on every clone, the weak ref value gets deleted when it goes out of scope, but the key remains in the hash forever leaking memory and I need a global class member hash (%largeWeakRefs) to hold the temporary $large reference. Has a smell.

How it that possible to do this in a cleaner way?

Here is my solution using the hash to hold the large ref temporarily:

package Tiny;

use Scalar::Util qw(refaddr weaken);

sub new {
    my ( $class, $tiny, $large ) = @_;
    return bless { tiny => $tiny, large => $large }, $class;
}

# Ugly temporary storage to hold $large refs from _freeze to _thaw...
my %largeWeakRefs;
sub STORABLE_freeze {
    my ( $self, $cloning ) = @_;
    my $large = delete local $self->{large};
    my $refaddr = refaddr $large;
    $largeWeakRefs{$refaddr} = $large;
    weaken $largeWeakRefs{$refaddr};
    my %restOfSelf = %$self;
    $self->{large} = $large;
    return $refaddr, \%restOfSelf;
}

sub STORABLE_thaw {
    my ($self, $cloning, $refaddr, $restOfSelf) = @_;
    %$self = %$restOfSelf;
    $self->{large} = $largeWeakRefs{$refaddr};
    return $self;
}

(Yes I know, my example only handles cloning, not straight-up freeze and thaw)


Solution

  • You could add reference counts.

    my %larges;
    
    sub STORABLE_freeze {
       my ( $self, $cloning ) = @_;
       if ($cloning) {
          my $large_key = pack('j', refaddr(self->{large})); 
          $larges{$large_key} //= [ $self->{large}, 0 ];
          ++$larges{$large_key}[1];
          return ( $large_key, $self->{tiny} );
       } else {
          return ( "", $self->{tiny}, $self->{large} );
       }
    }
    
    sub STORABLE_thaw {
       my ( $self, $cloning, $serialized ) = splice(@_, 0, 3);
       if ($cloning) {
          my $large_key = $serialized;
          $self->{ tiny  } = shift;
          $self->{ large } = $larges{$large_key}[0];
          --$larges{$large_key}[1]
             or delete($larges{$large_key});
       } else {
          $self->{ tiny  } = shift;
          $self->{ large } = shift;
       }
    }
    

    Untested.

    If the cloning process dies, you'll have a memory leak.


    Alternatively, you could avoid the need for external resources as follows:

    use Inline C => <<'__EOS__';
    
       IV get_numeric_ref(SV *sv) {
          SvGETMAGIC(sv);
          if (!SvROK(sv))
             croak("Argument not a reference");
    
          sv = MUTABLE_SV(SvRV(sv));
          SvREFCNT_inc(sv);
          return PTR2IV(sv);   /* Despite its name, can be used to convert pointer to IV */
       }
    
       SV* get_perl_ref_from_numeric_ref(IV iv) {
          SV* sv = PTR2IV(iv);
          return newRV_noinc(sv);
       }
    
    __EOS__
    
    sub STORABLE_freeze {
       my ( $self, $cloning ) = @_;
       if ($cloning) {
          return ( pack('j', get_numeric_ref($self->{large})), $self->{tiny} );
       } else {
          return ( "", $self->{tiny}, $self->{large} );
       }
    }
    
    sub STORABLE_thaw {
       my ( $self, $cloning, $serialized ) = splice(@_, 0, 3);
       if ($cloning) {
          $self->{ tiny  } = shift;
          $self->{ large } = get_perl_ref_from_numeric_ref(unpack('j', $serialized));
       } else {
          $self->{ tiny  } = shift;
          $self->{ large } = shift;
       }
    }
    

    Didn't test STORABLE_freeze and STORABLE_thaw, but tested the C/XS code using the following:

    use strict;
    use warnings;
    use feature qw( say state );
    
    use Cpanel::JSON::XS qw( );
    
    sub _dump {
       state $encoder = Cpanel::JSON::XS->new->canonical->allow_nonref;
       return $encoder->encode($_[0]);
    }
    
    {
       my %h = ( a => 4, b => 5 );
       say _dump(\%h);                                # {"a":4,"b":5}
       say sprintf "0x%x", \%h;                       # 0x32cdbf8
       say Internals::SvREFCNT(%h);                   # 1
       my $i = get_numeric_ref(\%h);
       say sprintf "0x%x", $i;                        # 0x32cdbf8
       say Internals::SvREFCNT(%h);                   # 2
       my $ref = get_perl_ref_from_numeric_ref($i);
       say sprintf "0x%x", $ref;                      # 0x32cdbf8
       say Internals::SvREFCNT(%h);                   # 2
       say _dump($ref);                               # {"a":4,"b":5}
    }
    

    If the cloning process dies, you'll have a memory leak. I suppose it would be safe to rely on "large" not going anywhere during the cloning process, so you could remove the SvREFCNT_inc and change newRV_noinc to newRV to avoid the potential memory leak.


    To avoid the possible memory leak, never store "large" in the object.

    my %larges;
    
    sub new {
       my $class = shift;
       my $self = bless({}, $class);
       return $self->_init(@_);
    }
    
    sub _init {
       my ($self, $tiny, $large) = @_;
       $self->{ tiny } = $tiny;
    
       {
          my $large_key = pack('j', refaddr($self));
          $self->{ large_key } = $large_key;
          $larges{ $large_key } = $large;
       }
    
       return $self;
    }
    
    sub DESTROY {
       my ($self) = @_;
       if (defined( my $large_key = $self->{ large_key } )) {
          delete( $larges{ $large_key } );
       }
    }
    
    sub STORABLE_freeze {
       my ( $self, $cloning ) = @_;
       if ($cloning) {
          return ( $self->{large_key}, $self->{tiny} );
       } else {
          return ( "", $self->{tiny}, $larges{ $self->{large_key} } );
       }
    }
    
    sub STORABLE_thaw {
       my ( $self, $cloning, $serialized ) = splice(@_, 0, 3);
       if ($cloning) {
          my ($tiny) = @_;
          my $large_key = $serialized;
          $self->_init($tiny, $larges{ $large_key });
       } else {
          $self->_init(@_);
       }
    }
    

    Untested.

    No memory leaks if the cloning process dies.