Search code examples
cxxd

hexdump output vs xxd output


I'm trying to create a hexdump like xxd but there are some differences that I'm trying to resolve. Currently the program processes 10 characters per line as seen on the utmost right column vs 16 in xxd. It also only shows 1 octet per column instead of pairs of 2 octet's.

xxd

 0000000: 2369 6e63 6c75 6465 203c 7374 6469 6f2e  #include <stdio.

my output

 0:  23 69 6E 63 6C 75 64 65 20 3C  #include <     

EDIT:

To add some clarification, I am trying to achieve two things. 1) I would like this program to output exactly like xxd. For this it needs to output 32 Hex numbers (8x columns of 4). 2) I would also like the program to list the hex numbers in row's columns of 4 like in xxd.

I've tried to edit the "10" in the source below to something like "12" but it creates errors in the output, it seems to be a magic number.

source:

 #include <stdio.h>    
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 #include <ctype.h>

 #define BYTE_OFFSET_INIT 8
 #define CHAR_OFFSET_INT  39
 #define LINE_LENGTH 50

 static void print_e(int e, char *program, char *file)
 {
   fprintf(stderr, "%s: %s: %s\n", program, file, strerror(e));
 }
 static void print_line(char *line)
 {
   int i;  
 /* sprintf leaves terminators mid-line, so clear them out so we can print the full line */
   for (i = BYTE_OFFSET_INIT; i < CHAR_OFFSET_INT; i++)
     if (line[i] == '\0')
       line[i] = ' ';
   printf("%s\n", line);
 }

 int main(int argc, char *argv[])
 {
   char line[LINE_LENGTH + 1];

   int ch;
   int character = 0;
   int line_offset = 0;
   int byte_offset = BYTE_OFFSET_INIT, char_offset = CHAR_OFFSET_INT;

   if (argc != 2) {
     fprintf(stderr, "Usage: %s [file]\n", argv[0]);
     exit(EXIT_FAILURE);
   }
   FILE *fp = fopen(argv[1], "rb");
   if (!fp) {
     print_e(errno, argv[0], argv[1]);
     exit(EXIT_FAILURE);
   }

   printf("Offset              Bytes              Characters\n");
   printf("------  -----------------------------  ----------\n");

   while ((ch = fgetc(fp)) != EOF) {
     if (character == 0) {
       sprintf(line, "%6d  ", line_offset);
       line_offset += 10;
     }
     sprintf(line + byte_offset, "%02X ", ch);
     sprintf(line + char_offset, "%c", isprint(ch) ? ch : '.');
     character++;
     char_offset++;
     byte_offset += 3;

     if (character == 10) {
       print_line(line);
       character = 0;
       char_offset = CHAR_OFFSET_INT;
       byte_offset = BYTE_OFFSET_INIT;
     }
   }
   if (ferror(fp)) {
     print_e(errno, argv[0], argv[1]);
     exit(EXIT_FAILURE);
   }

   if (character > 0)
     print_line(line);

   if (fclose(fp) == EOF) {
     print_e(errno, argv[0], argv[1]);
     exit(EXIT_FAILURE);
   }
   return 0;
 }

Solution

  • While it's possible to scan one byte at a time and write it into the output string at the correct position, it is by no means necessary. It is way easier to read DISPLAY_LENGTH bytes at once and loop over the thus read number of bytes twice; first outputting the hex representation, then again for the ASCII characters. The only (minor) caveat is what to do at the end of the file; but since fread returns the number of characters, you can just keep on counting and output spaces for as long as necessary to fill the hex line.

    This leads to the following program. DISPLAY_LENGTH is the total number of bytes to display per line, GROUP_BYTES is the number of single bytes in each hexadecimal group (setting it to 1 will display a 'regular' spaced hex output, 2 will group as in your xxd example, and higher values should also work).

    I had some fun figuring out the magic formulae to correctly center the text Bytes and calculating how many dashes to display for the separator. The rest is very straightforward.

    I don't know what xxd output looks like, apart from your one-line example, so I use stat to read out the length of the file in advance (with an added opportunity to display an error for "not a file" -- try for example with a folder) and display the correct number of dashes and spaces to line up the line counter. I set this value to a minimum of 6 so there is always room for the text Offset.

    If your compiler is not a modern one, it may complain about the %zu format string. If so, use %lu; you may also need to change all occurrences of size_t to unsigned long.

    #include <stdio.h>    
    #include <stdlib.h>
    #include <sys/stat.h>
    #include <string.h>
    #include <errno.h>
    #include <ctype.h>
    
    #define DISPLAY_LENGTH  21
    #define GROUP_BYTES     2
    
    static void print_e(int e, char *program, char *file)
    {
        fprintf(stderr, "%s: %s: %s\n", program, file, strerror(e));
    }
    
    int main(int argc, char *argv[])
    {
        size_t i;
        struct stat fs;
        int n_digit;
        unsigned char read_buf[DISPLAY_LENGTH];
        size_t bytes_read, cpos = 0;
    
        if (argc != 2)
        {
            fprintf(stderr, "Usage: %s [file]\n", argv[0]);
            exit(EXIT_FAILURE);
        }
    
        FILE *fp = fopen(argv[1], "rb");
        if (!fp)
        {
            print_e (errno, argv[0], argv[1]);
            exit(EXIT_FAILURE);
        }
    
        if (stat(argv[1], &fs) == -1)
        {
            print_e (errno, argv[0], argv[1]);
            exit(EXIT_FAILURE);
        }
    
        if ((fs.st_mode & S_IFMT) != S_IFREG)   /* regular file */
        {
            fprintf(stderr, "Not a regular file: %s\n", argv[1]);
            exit(EXIT_FAILURE);
        }
    
        n_digit = 0;
        while (fs.st_size > 0)
        {
            fs.st_size /= 10;
            n_digit++;
        }
        if (n_digit < 6)
            n_digit = 6;
    
        printf("%*s  ", n_digit, "Offset");
        printf("%*s%-*s", ((2*DISPLAY_LENGTH+(DISPLAY_LENGTH+GROUP_BYTES-1)/GROUP_BYTES)+2)/2, "Bytes", ((2*DISPLAY_LENGTH+(DISPLAY_LENGTH+GROUP_BYTES-1)/GROUP_BYTES)+2-5)/2, "");
        printf ("  Characters\n");
        for (i=0; i<n_digit; i++)
            printf ("-");
    
        printf("  ");
    
        for (i=1; i<2*DISPLAY_LENGTH+(DISPLAY_LENGTH+GROUP_BYTES-1)/GROUP_BYTES; i++)
            printf ("-");
        printf ("  ");
    
        for (i=0; i<DISPLAY_LENGTH; i++)
            printf ("-");
        printf ("\n");
    
        while ( (bytes_read = fread (read_buf, 1, DISPLAY_LENGTH, fp)))
        {
            printf ("%*zu ", n_digit, cpos);
    
            for (i=0; i<bytes_read; i++)
            {
                if (!(i % GROUP_BYTES))
                    printf (" ");
                printf ("%02X", read_buf[i]);
            }
            while (i < DISPLAY_LENGTH)
            {
                if (!(i % GROUP_BYTES))
                    printf (" ");
                printf ("  ");
                i++;
            }
    
            printf ("  ");
    
            for (i=0; i<bytes_read; i++)
                printf ("%c", isprint(read_buf[i]) ? read_buf[i] : '.');
    
            printf ("\n");
    
            cpos += bytes_read;
        }
        if (ferror(fp))
        {
            print_e (errno, argv[0], argv[1]);
            exit(EXIT_FAILURE);
        }
    
        if (fclose(fp))
        {
            print_e (errno, argv[0], argv[1]);
            exit(EXIT_FAILURE);
        }
        return 0;
    }
    

    Sample output, displaying its own compiled executable with a display length of 21 and grouped per 2 bytes:

    Offset                        Bytes                           Characters
    ------  ----------------------------------------------------  ---------------------
         0  CFFA EDFE 0700 0001 0300 0080 0200 0000 0D00 0000 70  ....................p
        21  0600 0085 0020 0000 0000 0019 0000 0048 0000 005F 5F  ..... .........H...__
        42  5041 4745 5A45 524F 0000 0000 0000 0000 0000 0000 00  PAGEZERO.............
    ... (673 very similar lines omitted) ...
     14196  7075 7473 005F 7374 6174 2449 4E4F 4445 3634 005F 73  puts._stat$INODE64._s
     14217  7472 6572 726F 7200 6479 6C64 5F73 7475 625F 6269 6E  trerror.dyld_stub_bin
     14238  6465 7200 0000                                        der...