Perl regex start match search after (skip) N char

Probably basic question, but can't find the answer: I have a pattern match regex where I'm looking for 'G', 187 char and 'G' again, in a large buffer. This works well with $s =~ m/(G.{187}G)/s. Sometimes I want to add an offset of N bytes to the search (I don't want to start at position 0 of the buffer). I now I can do $s =~ m/.{N}(G.{187}G)/s but this sounds to me like not very efficient as I don't want to parse at all the beginning buffer (and it might be big). I tried to work with \G but could not get it to right.

Thanks

Solution

I tested the speed of the two approaches:

use feature qw(say);
use strict;
use warnings;

use Benchmark qw(:all);
use Getopt::Long qw(GetOptions);

GetOptions( "case=i" => \my $case ) or die("Error in command line arguments\n");

my $num_cases = 5;
sub case1 { return (187, 2000) };
sub case2 { return (5, 10000) };
sub case3 { return (5, 20000) };
sub case4 { return (10000, 20000) };
sub case5 { return (5, 40000) };

my @cases = ( $case );
if ( !defined $case ) {
    @cases = 1..$num_cases;
}
for my $case ( @cases ) {
    run_case( $case );
}

sub run_case {
    my ( $case ) = @_;

    my $case_coderef = \&{"case" . $case};
    my ( $M, $N ) = $case_coderef->();
    say "Running case $case: \$M = $M, \$N = $N";

    my $prefix = 'A' x $N;
    my $middle = 'B' x $M;
    my $match_str = 'G' . $middle . 'G';

    my $str = $prefix . $match_str;

    my %methods = map {; "method$_" => \&{"method" . $_} } 1..6;

    for my $meth (keys %methods) {
        my $res = eval {
            $methods{$meth}->($str, $M, $N)
        };
        if ( $@ ) {
            print "$@";
            say "Skipping method '$meth'..";
            delete $methods{$meth};
            next;
        }   
        die "Method '$meth' failed.\n" if $res ne $match_str;
    }

    my %code = map { $_ => eval ('sub { $methods{' . $_ . '}->($str, $M, $N) }') } sort keys %methods; 

    cmpthese(-5, \%code );
}

sub method1 {
    my ( $str, $M, $N ) = @_;
    $str =~ m/.{$N}(G.{$M}G)/;
    return $1;
}

sub method2 {
    my ( $str, $M, $N ) = @_;
    pos( $str ) = $N;
    $str =~ m/\G(G.{$M}G)/;
    return $1;
}

sub method3 {
    my ( $str, $M, $N ) = @_;
    pos( $str ) = $N;
    $str =~ m/(G.{$M}G)/g;
    return $1;
}

sub method4 {
    my ( $str, $M, $N ) = @_;
    $str =~ m/.{$N}(G.{$M}G)/s;
    return $1;
}

sub method5 {
    my ( $str, $M, $N ) = @_;
    pos( $str ) = $N;
    $str =~ m/\G(G.{$M}G)/s;
    return $1;
}

sub method6 {
    my ( $str, $M, $N ) = @_;
    pos( $str ) = $N;
    $str =~ m/(G.{$M}G)/gs;
    return $1;
}

Output:

Running case 1: $M = 187, $N = 2000
             Rate method1 method3 method2 method6 method5 method4
method1  696485/s      --    -37%    -39%    -44%    -46%    -57%
method3 1112322/s     60%      --     -3%    -10%    -13%    -32%
method2 1146132/s     65%      3%      --     -7%    -10%    -30%
method6 1234678/s     77%     11%      8%      --     -3%    -24%
method5 1278898/s     84%     15%     12%      4%      --    -21%
method4 1629148/s    134%     46%     42%     32%     27%      --
Running case 2: $M = 5, $N = 10000
             Rate method1 method2 method6 method5 method3 method4
method1  226784/s      --    -72%    -72%    -72%    -72%    -78%
method2  801020/s    253%      --     -0%     -2%     -3%    -23%
method6  802386/s    254%      0%      --     -1%     -3%    -23%
method5  814132/s    259%      2%      1%      --     -1%    -22%
method3  823653/s    263%      3%      3%      1%      --    -21%
method4 1046605/s    361%     31%     30%     29%     27%      --
Running case 3: $M = 5, $N = 20000
             Rate method1 method3 method2 method6 method5 method4
method1  122763/s      --    -90%    -90%    -90%    -90%    -92%
method3 1252858/s    921%      --     -0%     -1%     -2%    -23%
method2 1258330/s    925%      0%      --     -1%     -1%    -22%
method6 1265165/s    931%      1%      1%      --     -1%    -22%
method5 1274309/s    938%      2%      1%      1%      --    -21%
method4 1622943/s   1222%     30%     29%     28%     27%      --
Running case 4: $M = 10000, $N = 20000
             Rate method1 method3 method2 method6 method5 method4
method1   90687/s      --    -62%    -62%    -93%    -93%    -94%
method3  236835/s    161%      --     -1%    -81%    -81%    -86%
method2  238334/s    163%      1%      --    -81%    -81%    -85%
method6 1236025/s   1263%    422%    419%      --     -3%    -25%
method5 1270943/s   1301%    437%    433%      3%      --    -22%
method4 1638548/s   1707%    592%    588%     33%     29%      --
Running case 5: $M = 5, $N = 40000
Quantifier in {,} bigger than 32766 in regex; marked by <-- HERE in m/.{ <-- HERE 40000}(G.{5}G)/ at ./p.pl line 83.
Skipping method 'method4'..
Quantifier in {,} bigger than 32766 in regex; marked by <-- HERE in m/.{ <-- HERE 40000}(G.{5}G)/ at ./p.pl line 63.
Skipping method 'method1'..
             Rate method2 method3 method6 method5
method2 1253528/s      --     -1%     -1%     -2%
method3 1260746/s      1%      --     -0%     -1%
method6 1263378/s      1%      0%      --     -1%
method5 1278718/s      2%      1%      1%      --

I ran this on my Laptop using Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz running Ubuntu 16.10, the result shows that using the s modifier can speed up method1 dramatically ( compare with method4). But method1 or method4 cannot be used when $N exceeds 32766.