Search code examples
regexperlrecursive-regex

Renamespacing c++ header with perl - multiline issues


I am trying to enclose the namespace of a c++ header-only library in a custom namespace to avoid symbol name collisions with customer code.

Here is the perl script i wrote. It should enclose the def namespace in the abc namespace.

#!/usr/bin/perl

while (<>) { 
    s/namespace\s+def\s*
    ( # group 1 - braced string with balanced number of lbraces and rbraces
        \{
        ( # group 2 - String without braces or recursion to group 1
            [^\{\}]* # some string that does not contain braces
            | (?1) # recursion of group1
        )*
        \}
    )
    /namespace abc {\nnamespace def \1\n}/gcsx;
    print;
}

and here is the testfile i am using:

namespace def {}
namespace def { abc }
namespace def { { } }
namespace def { { abcd } }
namespace def {
    abc
}
namespace def { // some comment
    do_something();
}
namespace def {
    do_something();
    while (still_not_crashed()) {
        do_even_more();
    }
}

however running the script on the file only yields

namespace abc {
namespace def {}
}
namespace abc {
namespace def { abc }
}
namespace abc {
namespace def { { } }
}
namespace abc {
namespace def { { abcd } }
}
namespace def {
    abc
}
namespace def { // some comment
    do_something();
}
namespace def {
    do_something();
    while (still_not_crashed()) {
        do_even_more();
    }
}

The one line namespace work just fine but as soon as the pattern that should be matched contains a newline it stops working eventhough i added the s-modifier to my regex.

What am i doing wrong?


Solution

  • You have to read the whole file in a string then do the substitution on this string:

    #!/usr/bin/perl
    use Modern::Perl;
    
    undef$/;
    $_ = <DATA>;
        s/namespace\s+def\s*
        ( # group 1 - braced string with balanced number of lbraces and rbraces
            \{
            ( # group 2 - String without braces or recursion to group 1
                [^\{\}]* # some string that does not contain braces
                | (?1) # recursion of group1
            )*
            \}
        )
        /namespace abc {\nnamespace def $1\n}/gsx;
        print;
    
    
    __DATA__
    namespace def {}
    namespace def { abc }
    namespace def { { } }
    namespace def { { abcd } }
    namespace def {
        abc
    }
    namespace def { // some comment
        do_something();
    }
    namespace def {
        do_something();
        while (still_not_crashed()) {
            do_even_more();
        }
    }
    

    Output:

    namespace abc {
    namespace def {}
    }
    namespace abc {
    namespace def { abc }
    }
    namespace abc {
    namespace def { { } }
    }
    namespace abc {
    namespace def { { abcd } }
    }
    namespace abc {
    namespace def {
        abc
    }
    }
    namespace abc {
    namespace def { // some comment
        do_something();
    }
    }
    namespace abc {
    namespace def {
        do_something();
        while (still_not_crashed()) {
            do_even_more();
        }
    }