Here are two problems in the program First, is that when I uncomment the pthread_join() in the main function, there will be a seg fault, other wise the program will run... Second, is that the output file will be missing the first letter of each word that has stored in the global variable words from last read file. So, for example, there are two files:
one has words "abc abc abc abc abc abc abc abc".
the second has words "def def"
if i input 5 for the second argument when calling a.out, the output in the output file will be abc abc abc abc abc bc bc bc def def This is also a werid thing I could not figure out why.
/* main.c */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#include <ctype.h>
#include <pthread.h>
#include "hw3.h"
int index_;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
typedef struct files
{
char *inputfile;
FILE * outputfile;
} files;
void * readFile( void *arg ){
files *info = (files *)arg;
char fileName[80];
strncat(fileName, (info->inputfile), 79);
fileName[80] = '\0';
FILE *outputfd = info->outputfile;
FILE* fd;
fd = fopen(fileName, "r");
if ( fd == NULL) {
fprintf(stderr, "ERROR:<open() failed>\n");
}
printf("TID %d: Opened \"%s\"\n", (unsigned int)pthread_self(), fileName);
fflush(stdout);
int rc;
char ch[1] = {0};
char word[80] = {0};
ch[0] = fgetc(fd);
pthread_mutex_lock(&mutex);
while( ch[0] != EOF){
if( isalnum(ch[0]) ){
// char str = ch[0];
strncat(word, ch, 1);
}
else{//it's a word
if( strlen( word ) >= 2 ){
words[index_] = word;
printf("TID %d: Stored \"%s\" in shared buffer at index [%d]\n",(unsigned int)pthread_self(), word, index_ );
if( index_+ 1 == maxwords ){
index_ = 0;
printf("MAIN: Buffer is full; writing %d words to output file\n", maxwords);
for( unsigned int i = 0; i<maxwords; i++ ){
rc = fwrite( words[i], 1, sizeof(words[i]), outputfd );
fwrite( "\n", 1, sizeof("\n"), outputfd );
if( rc == -1 ){
fprintf(stderr, "ERRPR:<write() failed>\n");
//return EXIT_FAILURE;
}
}
}
else{
index_ ++;
}
}
for(int i = 0; i< strlen(word); i++){
word[i] = '\0';
}
}
ch[0] = fgetc(fd);
}
pthread_mutex_unlock(&mutex);
printf("TID %d: Closed \"%s\"; and exiting\n", (unsigned int)pthread_self(), fileName );
fclose(fd);
pthread_exit( NULL );
}
int main( int argc, char * argv[] ){
if(argc != 4){
fprintf(stderr, "ERROR: Invalid arguments\nUSAGE: ./a.out <input-directory> <buffer-size> <output-file>\n");
return EXIT_FAILURE;
}
//dynamically allocated words buffer with argument 2
maxwords = atoi(argv[2]);
words = (char**)calloc(maxwords, sizeof(char*) );
if ( words == NULL)
{
fprintf( stderr, "ERROR:<word calloc() failed\n>" );
return EXIT_FAILURE;
}
printf("MAIN: Dynamically allocated memory to store %d words\n", maxwords);
fflush(stdout);
//open/create output file of the third argument
FILE* outputfd = fopen (argv[3], "w");
if ( outputfd == NULL )
{
perror( "open() failed" );
return EXIT_FAILURE;
}
DIR * dir = opendir( argv[1] );
if(dir == NULL){
perror("ERRPR:<opendir() failed>");
return EXIT_FAILURE;
}
chdir(argv[1]);
printf("MAIN: Opened \"%s\" directory\n", argv[1]);
fflush(stdout);
pthread_t tid[10];
index_ = 0;
int i = 0;//files index
struct dirent * file;
//files allfiles[20];
char fileName[80];
int rc;
//-----------------------------------------------------------------------
// while loop reads all files in the directory
while ( ( file = readdir( dir ) ) != NULL )
{
struct stat buf;
rc = lstat( file->d_name, &buf ); /* e.g., "xyz.txt" */
/* ==> "assignments/xyz.txt" */
if ( rc == -1 ){
fprintf(stderr, "ERRPR:<lstat() failed>\n");
return EXIT_FAILURE;
}
if ( S_ISREG( buf.st_mode ) )
{
// printf( " -- regular file\n" );
// fflush(stdout);
strncpy(fileName, file->d_name, 79);
files info;
info.inputfile = fileName;
info.outputfile = outputfd;
//printf("%d",i);
printf("MAIN: Created child thread for \"%s\"\n",fileName);
rc = pthread_create( &tid[i], NULL, readFile,(void *)&info );
sleep(1);
i++
}
else if ( S_ISDIR( buf.st_mode ) )
{
// printf( " -- directory\n" );
// fflush(stdout);
}
else
{
// printf( " -- other file\n" );
// fflush(stdout);
}
}
closedir(dir);
printf("MAIN: Closed \"%s\" directory\n", argv[1]);
fflush(stdout);
printf("MAIN: Created \"%s\" output file\n",argv[3]);
fflush(stdout);
//-----------------------------------------------------------------------
for( int j = 0; j<i; j++){
printf( "MAIN: Joined child thread: %u\n", (unsigned int)tid[j] );
pthread_join(tid[i], NULL);
}
for( unsigned int i = 0; i<index_; i++ ){
int rc = fwrite( words[i], 1, sizeof(words[i]), outputfd );
if( rc == -1 ){
fprintf(stderr, "ERRPR:<write() failed>\n");
return EXIT_FAILURE;
}
}
printf( "MAIN: All threads are done; writing %d words to output file\n", index_);
fflush(stdout);
free( words );
fclose( outputfd );
return EXIT_SUCCESS;
}
This here is the whole program, and there is a header file which is just two global variab
char ** words = NULL;
/* global/shared integer specifying the size */
/* of the words array (from argv[2]) */
int maxwords;
Thanks to everyone for the help!
You need separate info
objects for each thread. Right now, all of the threads get the same info
object, which you change in between creating threads, and therefore, for most of them, by the time they get a chance to look at the name of the file they are supposed to process, it has been changed.
The segmentation fault is being caused by code you have not shown us, so I can't help you with that except to suggest that you apply valgrind
.