Search code examples
chexdiffhexdumpxxd

Could a simple C program mimic the default 'xxd' command such that its diff'd output would return 0?


I'm trying to write a C executable that will yield identical output to that of a default xxd command. For example, lets say I have a fairly small text file named test.txt and an executable named myxxd

So, I first make a benchmark for comparison by using:

$ touch correct-xxdoutput.txt test-output.txt
$ xxd test.txt > correct-xxdoutput.txt

Then using my executable for the same operation but to a different output file:

$ ./myxxd test.txt > test-output.txt
$ diff correct-xxdoutput.txt test-output.txt
$

I've got pretty close with some guesswork, but my formatting is always wrong somehow, and I don't really understand specifically how xxd generates hexDumps. Feels like I just took a totally wrong approach here, but maybe the task is just out of my potential with my current level of C knowledge.

My code (see also: https://pastebin.com/Vjkm8Wb4):

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#define SIZE 256

//Prototypes
void hexDump(void*, int);

int main(int argc, char *argv[])
{
    //Create and open filestream
    FILE *myfile;
    myfile =fopen(argv[1],"rb");

    for ( ; ; )
    {
        unsigned char buffer[SIZE];
        size_t n = fread(buffer, 1, SIZE, myfile);

        if (n > 0)
            hexDump(buffer, n);
        if (n < SIZE)
            break;
    }

    fclose(myfile);
    return 0;
}


void hexDump (void *addr, int len)
{
    int i;
    unsigned char bufferLine[17];
    unsigned char *pc = (unsigned char*)addr;

    for (i = 0; i < len; i++)
    {
        if ((i % 16) == 0)
        {
            if (i != 0)
                printf (" %s\n", bufferLine);

            if (pc[i] == 0x00) exit(0);
            printf ("%08x: ", i);
        }

        // Prints Hexcdoes that represent each chars.
        printf ("%02x", pc[i]);
        if ((i % 2) == 1)
            printf (" ");

        if ((pc[i] < 0x20) || (pc[i] > 0x7e))
        {
            bufferLine[i % 16] = '.';
        }

        else
        {
           bufferLine[i % 16] = pc[i];
        }    

        bufferLine[(i % 16) + 1] = '\0'; //Clears the next array buffLine
    }

    while ((i % 16) != 0)
    {
        printf ("  ");
        i++;
    }

    printf ("     %s\n", bufferLine);
}

Solution

  • There a multiple issues with your code, including:

    • You don't check that you have a file name to open.
    • You don't check that you opened the file that was named.
    • You don't have a mechanism to handle the offset into the output, so the addresses at the start of the lines after the first block are wrong.
    • Your code tests for a zero byte and exits silently when it encounters one. This is bad — twice. Once because a program that is meant to handle binary data must handle zero bytes as well values from 1..255; and once because exiting silently (and claiming success with exit(0) to boot) is bad. You should report the problem (on standard error, not standard output) and exit with an error status — a non-zero status.

    The core formatting seems to be mostly OK; there is a problem with padding the short line of data at the end of the file, too.

    I came up with this code, which is closely based on yours (but reformatted to suit at least some of my style prejudices — but my style is not far removed from yours most of the time):

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define SIZE 256
    
    void hexDump(size_t, void *, int);
    
    int main(int argc, char *argv[])
    {
        if (argc != 2)
        {
            fprintf(stderr, "Usage: %s file\n", argv[0]);
            exit(EXIT_FAILURE);
        }
        FILE *myfile = fopen(argv[1], "rb");
        if (myfile == 0)
        {
            fprintf(stderr, "%s: failed to open file '%s' for reading\n", argv[0], argv[1]);
            exit(EXIT_FAILURE);
        }
    
        unsigned char buffer[SIZE];
        size_t n;
        size_t offset = 0;
        while ((n = fread(buffer, 1, SIZE, myfile)) > 0)
        {
            hexDump(offset, buffer, n);
            if (n < SIZE)
                break;
            offset += n;
        }
    
        fclose(myfile);
        return 0;
    }
    
    void hexDump(size_t offset, void *addr, int len)
    {
        int i;
        unsigned char bufferLine[17];
        unsigned char *pc = (unsigned char *)addr;
    
        for (i = 0; i < len; i++)
        {
            if ((i % 16) == 0)
            {
                if (i != 0)
                    printf(" %s\n", bufferLine);
                // Bogus test for zero bytes!
                //if (pc[i] == 0x00)
                //    exit(0);
                printf("%08zx: ", offset);
                offset += (i % 16 == 0) ? 16 : i % 16;
            }
    
            printf("%02x", pc[i]);
            if ((i % 2) == 1)
                printf(" ");
    
            if ((pc[i] < 0x20) || (pc[i] > 0x7e))
            {
                bufferLine[i % 16] = '.';
            }
            else
            {
                bufferLine[i % 16] = pc[i];
            }
    
            bufferLine[(i % 16) + 1] = '\0';
        }
    
        while ((i % 16) != 0)
        {
            printf("  ");
            if (i % 2 == 1)
                putchar(' ');
            i++;
        }
        printf(" %s\n", bufferLine);
    
    }
    

    When run on your original source code and compared with the output from the system xxd, there are no differences. I also checked it against a file with just 16 characters (abcdefghijklmno plus a newline); the output was the same there too. And I checked it on its own binary — and found and fixed the zero byte and unannounced early exit problem.