Search code examples
cposixfile-read

Reading file with POSIX read


I have a file which is dumped by the following code:

for (unsigned long long i = 0; i < 10; i++) {
  unsigned char byte = rand() % 16; 
  printf("%02x", byte);
}

I can read this file using fscanf:

uint8_t *buf;
uint64_t index = 0;
unsigned int byte;
while (fscanf(input_file, "%02x", &byte) == 1) {
  buf[index] = static_cast<uint8_t>(byte);
  index++;
}

How do I use the POSIX read function instead of fscanf to read the values into buf?


Solution

  • Well... let me just start by saying that working with raw syscalls is not that simple. You have to take into account a variety of problems and edge cases that normal stdio functions take care for without you even noticing. Carefully read the manual page for read before writing the program.

    With the above being said, you can achieve what you want by creating a simple map that maps every ASCII character to its hexadecimal value. In the example below, I use 0x0 to also fill values that are not valid, but you could for example declare it as int8_t and fill invalid values with -1. That's up to you.

    Once you have a map that translates every ASCII character to the given hex value, you can use it to simply lookup the needed values, so for example hexmap['a'] == hexmap['A'] == 0xA. You can obtain a single uint8_t by reading two hex digits and then combining them with a simple left shift and binary OR, like this:

    uint8_t value = 0xA << 4 | 0x1;
    // Using the map:
    uint8_t value = hexmap['a'] << 4 | hexmap['1'];
    // value == 0xA1
    

    Here's a working example:

    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>
    #include <unistd.h>
    #include <fcntl.h>
    #include <sys/stat.h>
    #include <sys/types.h>
    
    static const uint8_t hexmap[128] = {
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x0, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x0, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    };
    
    int main(void) {
        uint8_t *buf;
        int fd;
    
        // Open file.
    
        fd = open("your_file", O_RDONLY);
        if (fd == -1) {
            perror("open failed");
            return 1;
        }
    
        // Allocate buffer.
        // Make sure it's big enough, or reallocate while reading later.
    
        buf = malloc(sizeof(uint8_t) * 10);
        if (buf == NULL) {
            perror("malloc failed");
            return 1;
        }
    
        unsigned char digits[2];
        size_t total = 0;
        ssize_t nread;
    
        while (1) {
            nread = read(fd, digits, 2);
    
            if (nread == -1) {
                perror("read failed");
                return 1;
            } else if (nread == 0) {
                // EOF reached
                break;
            }
    
            buf[total] = hexmap[digits[0]] << 4 | hexmap[digits[1]];
            total++;
    
            // Here be sure to reallocate buf if total gets too high.
        }
    
        close(fd);
    
        // Do whatever you want...
        // For example, print the values.
        for (size_t i = 0; i < total; i++) {
            printf("%d: %hhu\n", i, buf[i]);
        }
    
        free(buf);
    
        return 0;
    }
    

    Sample input:

    0a01ff
    

    Output:

    0: 10
    1: 1
    2: 255