Search code examples
cfilemmap

C mmap implementation for linux command


I have the following code which basically reproduce the functionality of the wc command in linux. My question is how I can rewrite the code using mmap? I know I can use struct stat sb; and then char *file_in_memory = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); but I can't get it work/ I don't know how to implement it correctly in the while loop while ((n = read(file, buffer, LUNG_BUF - 1)) > 0). In my tries after I run the code it will display only values of 0.

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>

#define LUNG_BUF 4096

int main(int argc, char** argv)
{
  int bytes = 0;
  int words = 0;
  int newLine = 0;
  int max_value;  // the maximum of the above three
  int dim;        // string width of the max value

  char buffer[LUNG_BUF];
  enum states { WHITESPACE, WORD };
  int state = WHITESPACE;
  if ( argc !=2 )
  {
    printf( "No file name\n%s", argv[0]);
  }
  else
  {
    int file = open(argv[1], O_RDONLY);

    if(file < 0)
    {
      printf("can not open :%s\n",argv[1]);
    }
    else
    {
      char *thefile = argv[1];
      size_t n;

      while ((n = read(file, buffer, LUNG_BUF - 1)) > 0)
      {
        buffer[n] = '\0';
        char *ptr = buffer;

        while (*ptr)
        {
          bytes++;
          if (*ptr == ' ' || *ptr == '\t')
          {
            state = WHITESPACE;
          }
          else if (*ptr == '\n')
          {
            newLine++;
            state = WHITESPACE;
          }
          else
          {
            if (state == WHITESPACE)
            {
                words++;
            }
            state = WORD;
          }
          ptr++;
        }
      }

      // find out the largest value of all and determine the printed width of it
      max_value = newLine;
      if (words > max_value)
        max_value = words;
      if (bytes > max_value)
        max_value = bytes;
      dim = snprintf(NULL, 0, "%d", max_value);

      // print lines, words, bytes and filename aligned to the longest number
      printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, thefile);
    }
  }
}

The script that I was trying:

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>

#define LUNG_BUF 4096

int main(int argc, char** argv)
{
  int bytes = 0;
  int words = 0;
  int newLine = 0;
  int max_value;  // the maximum of the above three
  int dim;        // string width of the max value

  char buffer[LUNG_BUF];
  enum states { WHITESPACE, WORD };
  int state = WHITESPACE;
  if ( argc !=2 )
  {
    printf( "No file name\n%s", argv[0]);
  }
  else
  {
    int file = open(argv[1], O_RDONLY);

    if(file < 0)
    {
      printf("can not open :%s\n",argv[1]);
    }
    else
    {
      char *thefile = argv[1];
      size_t n;
      struct stat sb;
      char *file_in_memory = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
      for(int i=0;i<=sb.st_size;i++)
      {
        buffer[i] = '\0';
        char *ptr = buffer;
        while (*ptr)
        {
          bytes++;
          if (*ptr == ' ' || *ptr == '\t')
          {
            state = WHITESPACE;
          }
          else if (*ptr == '\n')
          {
            newLine++;
            state = WHITESPACE;
          }
          else
          {
            if (state == WHITESPACE)
            {
                words++;
            }
            state = WORD;
          }
          ptr++;
        }
      }

      // find out the largest value of all and determine the printed width of it
      max_value = newLine;
      if (words > max_value)
        max_value = words;
      if (bytes > max_value)
        max_value = bytes;
      dim = snprintf(NULL, 0, "%d", max_value);

      // print lines, words, bytes and filename aligned to the longest number
      printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, thefile);
      munmap(file_in_memory, sb.st_size);
      close(file);
    }
  }
}

Solution

  • The code you posted above didn't compile and had quite a few problems. I've tidied it up a bit below, hopefully this will help. I tried not to change it too much so you could see what I did.

    You hadn't actually called stat and the fd variable you had passed to mmap was not the variable you used to open the file. I would always compile your code with "-Wall -Werror" if you can.

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>
    #include <fcntl.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <sys/mman.h>
    
    int main(int argc, char** argv)
    {
    
      if ( argc !=2 )
      {
        printf( "No file name\n%s", argv[0]);
        exit(-1);
      }
    
      char *fileName = argv[1];
    
      int file = open(fileName, O_RDONLY);
    
      if(file < 0)
      {
        perror("Error: ");
        exit(-1);
      }
    
      struct stat sb = {0};
      stat(fileName, &sb);
      char *filePtr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, file, 0);
      if (filePtr == MAP_FAILED)
      {
          perror("Error:");
          exit(-1);
      }
    
      int bytes = sb.st_size;
      int words = 0;
      int newLine = 0;
    
      enum states { WHITESPACE, WORD };
      int state = WHITESPACE;
    
      for(size_t pos=0;pos<=sb.st_size;pos++)
      {
          if (state == WHITESPACE)
          {
              if (filePtr[pos] == '\n')
              {
                  newLine++;
              }
              else if ((filePtr[pos] != ' ') && (filePtr[pos] != '\t'))
              {
                  state = WORD;
              }
          }
          else // (state == WORD)
          {
            if (filePtr[pos] == ' ' || filePtr[pos] == '\t')
            {
                state = WHITESPACE;
                words++;
            }
            else if (filePtr[pos] == '\n')
            {
                state = WHITESPACE;
                words++;
                newLine++;
            }
          }
      }
    
      // Max value is always bytes
    
      int dim = snprintf(NULL, 0, "%d", bytes);
    
      // print lines, words, bytes and filename aligned to the longest number
      printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, fileName);
    
      munmap(filePtr, sb.st_size);
      close(file);
    }