Search code examples
sasmd5command-line-interfacegnu-coreutils

Is it possible to replicate SAS md5 function output via GNU coreutils?


I expected this to be fairly straightforward, but I've run out of ideas this time. I'm working with with GNU coreutils on Windows 7 (not that it should make any difference). I've found another command line utility that does what I want, but I'd prefer to find a way of doing this via GNU md5sum if possible.

Here's what I'm trying to reproduce:

data _null_;
    length a $32;
    a = put(md5("Hello"), $hex32.);
    put a=;
run;
/*Output to replicate: 8B1A9953C4611296A827ABF8C47804D7*/

Here's what I've tried so far:

%macro wincmd /parmbuff;
    filename cmd pipe "&SYSPBUFF" lrecl = 32767;
    data _null_;
        infile cmd lrecl = 32767;
        input;
        put _infile_;
    run;
    filename cmd clear;
%mend wincmd;

%let MD5SUM = C:\Program Files (x86)\coreutils\bin\md5sum.exe;

%wincmd(echo Hello | ""&MD5SUM"");
/*Output: f0d07a42adce73f0e4bc2d5e1cdb71e5 *- */

%wincmd(echo Hello | ""&MD5SUM"" -t);
/*Output: adb3f07f896745a101145fc3c1c7b2ea *- */

%wincmd(echo ""Hello"" | ""&MD5SUM"");
/*Output: 2c3a70806465ad43c09fd387e659fbce *- */

%let MD5 = C:\Program Files (x86)\md5\md5.exe;

%wincmd(echo Hello | ""&MD5"");
/*Output: F0D07A42ADCE73F0E4BC2D5E1CDB71E5 (matches md5sum)*/

%wincmd(echo ""Hello"" | ""&MD5"");
/*Output: 2C3A70806465AD43C09FD387E659FBCE (matches md5sum)*/

%wincmd(""&MD5"" -d""Hello"");
/*Output: 8B1A9953C4611296A827ABF8C47804D7  (matches SAS!)*/

Is there some form of syntax I can use with md5sum that will result in the same output (except possibly for upper/lower case differences) as SAS and md5 -d ? And why does the same string produce a different MD5 hash when read from stdin rather than as a command line parameter?

Update: fix, as suggested by DomPazz and Rob:

I thought I might as well go all in with coreutils at this point and match the SAS output exactly:

%let GNUPATH = C:\Program Files (x86)\coreutils\bin;
%let ECHO = &GNUPATH\echo.exe;
%let TR = &GNUPATH\tr.exe;
%let CUT = &GNUPATH\cut.exe;

%wincmd(""&ECHO"" -n ""Hello"" | ""&MD5SUM"" | ""&TR"" '[a-f]' '[A-F]' | ""&CUT"" -f 1 -d "" "");
/*Output: 8B1A9953C4611296A827ABF8C47804D7*/

Solution

  • You problem is not in md5sum, but in echo. It is adding white space to the "Hello" string.

    Verify

    C:\>echo Hello > c:\temp\test.txt
    C:\>md5sum c:\temp\test.txt
    

    -- I get: f0d07a42adce73f0e4bc2d5e1cdb71e5

    Now open the file and notice the white space and a newline. Delete those Run

    C:\>md5sum c:\temp\test.txt
    

    -- I get 8b1a9953c4611296a827abf8c47804d7, which matches SAS.

    EDIT: As mentioned in the comments below GNU echo has the -n option to strip the white space.

    C:\Cygwin\bin>echo.exe -n Hello | md5sum.exe
    

    returns: 8b1a9953c4611296a827abf8c47804d7 which matches the SAS value.