Search code examples
clinuxtail

wrong output with my own implementation of the linux tail function


I wrote a program that should print out the last n bytes of a file. It should be called like "./tail -n filename". Here is the full code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
int get_lines(char** argv);
int get_bytes(int lines, int fd);
int main(int argc, char* argv[])
{
    if (argc != 3) {
        printf("ungültige Anzahl an args\n");
        return EXIT_FAILURE;
    }
    int lines = get_lines(argv);
    if (lines == -1) return EXIT_FAILURE;
    // open file
    char* path = argv[2];
    int fd = open(path, O_RDONLY);
    if (fd == -1) {
        printf("file konnte nicht geöffnet werden oder existiert nicht\n");
        return EXIT_FAILURE;
    }
    // get stat of fiĺe
    struct stat infos;
    if (stat(path, &infos) == -1) {
        printf("stat failed\n");
        return EXIT_FAILURE;
    }
    // set offset to last byte
    if (lseek(fd, -1, SEEK_END) == -1) {
        printf("lseek 1 failed\n");
        return EXIT_FAILURE;
    }
    // determine number of bytes corresponding to number of lines
    int bytes = get_bytes(lines, fd);
    // printf("lines: %d\nbytes: %d\n", lines, bytes);
    if (bytes == -1) return EXIT_FAILURE;
    // set offset to beginning of tail and save tail of file in buffer tail
    char tail[bytes];
    if (lseek(fd, -bytes, SEEK_END) == -1) {
        printf("lseek 1 failed\n");
        return EXIT_FAILURE;
    }
    if (read(fd, tail, bytes) == -1){
        printf("read failed\n");
        return EXIT_FAILURE;
    }
    printf("%s\n", tail);
    return 0;
}

int get_lines(char** argv) {
    // cast string without - to int
    char* substr = &argv[1][1];
    int lines = atoi(substr);
    if (lines == 0) {
        printf("-n mit n = Anzahl Zeilen\n");
        return -1;
    }
    return lines;
}

int get_bytes(int lines, int fd) {
    int bytes = 0;
    char buff[1];
    while (1) {
        if (read(fd, buff, 1) == -1){
            printf("read failed\n");
            return -1;
        }
        if (buff[0] == '\n') {
            lines--;
            if (lines <= 0) break;
        }
        if (lseek(fd, -2, SEEK_CUR) == -1) {
            // n bytes was bigger than bytes of file
            break;
        }
        bytes++;
    }
    return bytes;
}

it works but some additional characters are printed out at the end. For example when I call the program with the source code of my tail implementation like "./tail -4 tail.c" i get outputs like:

        bytes++;
    }
    return bytes;
}�E0V

or

        bytes++;
    }
    return bytes;
}�z�U

so my guess is, i'm reading to much bytes in the tail variable at the end. But I can't find the bug. Am I counting the bytes incorrectly?


Solution

  • It seems you are reading bytes bytes into tail and passing tail to %s.

    This will lead to undefined behavior because %s requires a pointer to a string (null-terminated sequence of characters) while tail won't contain any terminating null-character, and therefore it will read out-of-bounds, seeking for terminating null-character.

    To overcome this issue, you can specify length to print.

    Try printf("%.*s\n", bytes, tail); instead of printf("%s\n", tail);.