Search code examples
arrayscpointersdynamic-memory-allocation

Having trouble reusing a dynamically allocted array of pointers: C Language


.h File

#ifndef MDSHELL_H
#define MDSHELL_H

#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <ctype.h>

void getCommand(char * command, char *commandcopy);
void getNumOfTokens(char *command, char ***arrayofTokens, int *numberofTokens);
void getTokens(char *commandcopy, char ***arrayofTokens, int *numberofTokens);


#endif

main.c file:

#include <stdio.h>
#include <stdlib.h>
#include "mdshell.h"



int main()
{
  printf("Created by Matthew Lewis\n");
  printf("Type \"help\" to see implemented commands\n\n");

  char *command= NULL;
  char *commandcopy = NULL;
  char **arrayofTokens;
  int numberofTokens;

  while(command == NULL || (strcmp(command, "exit")) != 0)
  {
    if(command == NULL)
    {
      command = malloc(100 * sizeof(char));
      commandcopy = malloc(100 * sizeof(char));
    }
    else{
      free(command);
      free(commandcopy);
      command = malloc(100 * sizeof(char));
      commandcopy = malloc(100 * sizeof(char));
    }
    getCommand(command, commandcopy);
    getNumOfTokens(command, &arrayofTokens, &numberofTokens);
    getTokens(commandcopy, &arrayofTokens, &numberofTokens);

    for(int i = 0; i < numberofTokens; i++)
    { //used for testing to see tokens being returned. will not be in final build
      printf("%s\n", arrayofTokens[i]);
    }

    if((strcmp(command, "help")) == 0)
    {
      printf("pwd - Print the current working directory\n");
      printf("exit - Exit shell\n");
      printf("help - Display this message\n");
      printf("<UNIX cmd>  - Spawn child process, excecute <UNIX cmd> in the foreground\n\n");
    }

    for (int i = 0; i < numberofTokens; i++)
    {
      free(arrayofTokens[i]); //free each element in array of pointers
    }
    free(arrayofTokens);  //frees the array of pointers

  }

mdshell.c file

#include "mdshell.h"
#include <stdio.h>
#include <stdlib.h>


void getCommand(char * command, char *commandcopy)
{
  char word[100];
  int size = 0;
  printf("mdshell> ");
  fgets(word, 100, stdin);

  // removes the newline char at the end of whatever user entered
  size = strlen(word);
  word[size - 1] = '\0';

  strcpy((command), word);
  strcpy((commandcopy), word);
}


void getNumOfTokens(char *command, char ***arrayofTokens, int *numberofTokens)
{   
  int isDone = 0;
  char delim[] = " ";
  

  while(!isDone)    // used to find out how many words there are
  {
    if((*numberofTokens) == 0)
    {
      char *word = strtok(command, delim);
      if(word == NULL)
      {
        isDone = 1;
        (*numberofTokens)++;
      }
      else
      (*numberofTokens)++;
    }
    else
    {
      char *word = strtok(NULL, delim);
      if(word == NULL)
      {
        isDone = 1;
        (*numberofTokens)++;
      }
      else
      (*numberofTokens)++;
    }
  }

  //dynamically allocate the array of pointers to the number of elements needed
  (*arrayofTokens) = malloc((*numberofTokens) * sizeof(char *));

}

void getTokens(char *commandcopy, char ***arrayofTokens, int *numberofTokens)
{
  char delim[] = " ";
  int size = 0;
  int arrElement = 0;
  
  char *word = strtok(commandcopy, delim);
  if(word != NULL)
  {
    size = strlen(word);
    (*arrayofTokens)[arrElement] = malloc((size) * sizeof(char));
    strcpy((*arrayofTokens)[arrElement], word);
    arrElement++;
  }
  else
  {
    (*arrayofTokens)[arrElement] = NULL;
  }
  while(word != NULL)
  {
    word = strtok(NULL, delim);
    if(word == NULL)
    {
      (*arrayofTokens)[arrElement] = malloc((size) * sizeof(char));
    }
    else
    {
      size = strlen(word);
      (*arrayofTokens)[arrElement] = malloc((size) * sizeof(char));
      strcpy((*arrayofTokens)[arrElement], word);
      arrElement++;
    }
  }
}

I am writing a little shell program for linux. I have the user enter a command that I then break up into an null terminated array or pointers to char that will be needed for the execvp() function. I have not gotten to that part yet. execvp() needs a null terminated array of strings. I have getting the string into an array of strings figured out except for getting null at the end of the string. It just shows blank instead of (NULL) even when I assign that element to NULL

Here is example of output I am getting:

output example

I have a feeling I am not freeing memory correctly for arrayofTokens. I am freeing each dynamically allocated part so I dont know why I am getting segmentation faults. Any help would be appreciated so I can move on to the rest of the assignment. Have a good day!

I was expecting to be able to use arrayofTokens multiple times while using different commands in my shell. this is not working after the first pass.


Solution

  • It's obvious you've really tried hard to get this to work. Kudos!

    Hidden in the heaps of code was the bug pointed out by @BoP in the comments. Perhaps the lesson is to strive to write less code.

    Comments:

    1. How many calls to malloc() go unchecked? Don't assume things always work!
    2. MDShell.h - Good use of "header guards", but... Avoid adding #includes to header files when unnecessary. It suggests the content of those headers is relevant to the rest of this header file. Their presence here is both unnecessary and distracting.
    3. Shorter variable names would be easier to scan. Use descriptive names, by all means, but don't force your reader to slow down to examine polysyllabic monstrosities, especially when one or more are almost indistinguishable.
    4. void getCommand() is passed a buffer, but uses its own initially. Why? And, if the program needs a 2nd copy, this is not the place to make the clone.
    5. void getNumOfTokens() takes 3 parameters, then squanders its return value by being a void function. The # of tokens could be returned to the caller, and the function signature simplified. (The looping if/else could be vastly simplified.)
    6. void getTokens()... Again, code duplication hides a 2nd bug. Not only is the allocated buffer size too short for the trailing '\0', but the value of argv[argc] is meant to be NULL. Too much timid and tentative code makes it difficult to see what's going wrong.
    7. main()??? Thrashing dynamic memory with malloc() and free() indicates the code needs to be rethought.
    8. (Opinion:) What is the value of indenting blocks by only 2 spaces? To be able to write longer lines of code? Long lines of code are harder for the reader to scan (see the code-golf example below.) My experience is that an indent of 4 spaces makes clear the subordinate expressions or blocks of if(), for(), while(), etc. With proper attention and care, this lessens the necessity of the filigree of curly braces, make the code even easier to scan. Just an opinion...

    Below is a single file re-write of your code. I hope this points you in a better direction.

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define MAX_CMD_LEN 128
    
    char **tokenise( char *str, int *n ) {
        static const char *dlm = " \t";
        char cpy[ MAX_CMD_LEN ]; // disposable copy
        strcpy( cpy, str );
    
        // chop disposable copy to determine # of tokens (pointers)
        // start with '1' to allow for extra NULL as sentinel at the end of array
        int i = 1;
        for( char *cp = cpy; (cp = strtok(cp, dlm)) != NULL; cp = NULL )
            i++;
    
        char **arr = calloc( i, sizeof *arr ); // use calloc for consistency
        /* verification of allocation success left as an exercise */
    
        // chop actual buffer storing pointers to each segment
        i = 0;
        for( char *xp = str; (xp = strtok(xp, dlm)) != NULL; xp = NULL )
            arr[ i++ ] = xp;
    
        *n = i - 1; // # of tokens is 1 less than pointers assigned
        return arr;
    }
    
    int main( void ) {
        for( ;; ) {
            char iBuf[ MAX_CMD_LEN ]; // small stack buffer
    
            printf( "mdshell> " );
            fgets( iBuf, sizeof iBuf, stdin );
            iBuf[ strcspn( iBuf, "\n" ) ] = '\0'; // trim LF from end
    
            if( strcmp( iBuf, "help" ) == 0 ) {  // early resolution
                puts(
                    "pwd - Print the current working directory\n"
                    "exit - Exit shell\n"
                    "help - Display this message\n"
                    "<UNIX cmd>  - excecute <UNIX cmd> in the foreground\n"
                );
                continue;
            }
    
            int arrgc;
            char **arrgv = tokenise( iBuf, &arrgc ); // KISS
    
            // demonstrate results
            for( int i = 0; i <= arrgc; i++)
                printf( "[%d] '%s'\n", i, arrgv[i] );
    
            free( arrgv ); // easy...
        }
    
        return 0;
    }
    

    Results:

    mdshell> fools rush in where wise men fear to go
    [0] 'fools'
    [1] 'rush'
    [2] 'in'
    [3] 'where'
    [4] 'wise'
    [5] 'men'
    [6] 'fear'
    [7] 'to'
    [8] 'go'
    [9] '(null)'
    mdshell>      // just pressed RETURN here
    [0] '(null)'
    mdshell>
    

    NB! My compiler is kind, printing (null) instead of crashing. The mileage you get from another implementation may differ.


    EDIT:
    Bothered that tokenise(), above, has a second (disposable) stack buffer (size matters) solely to chop and count tokens, here's an alternative version (with code-golf-ish leanings.) This version of tokenise() does not depend on the compiler token MAX_CMD_LEN and can be moved into a source file of utility (helper) functions.

    In short, the following makes two passes over the buffer to be examined. The first pass invokes strtok() to chop and count how many tokens are found. Then, from the last token (lw), the buffer is healed and the array of pointers allocated ahead of the second pass that fills the array. This may make for interesting reading. It's minimal, and bound to raise eyebrows (and objections.)

    #include <stdlib.h>
    #include <string.h>
    
    char **tokenise( char *str, int *n ) {
        char *cp, *lw = str, **arr = NULL;
        for( int i = 0, stp = 2; stp--; i = 0 ) {
            for( cp = str; (cp = strtok(cp, " \t")) != NULL; cp = NULL, i++ )
                if( stp ) lw = cp; else arr[ i ] = cp;
            if( stp ) {
                while( lw > str ) if( !*--lw ) *lw = ' '; // 'healing' the buffer
                arr = calloc( (*n = i) + 1, sizeof *arr );
                if( arr == NULL ) return arr; // caller to handle error condition
            }
        }
        return arr;
    }
    

    NB: when probing toward the beginning of a string, care is necessary to avoid the UB trap of comparing pointers to the left of an array.

    It's worth comparing this function with the OP's custom version. This version makes no presumption about the content of the string, whereas the parameter names of the OP's version suggest it is single purpose. Strive to write utility functions that may be used in other circumstances. Perhaps the delimiter string should/could be passed to the function as another parameter, instead of being a hard-wired single string...