Search code examples
cjustify

How to delete Text in C?


Here is basically the problem. I am given a huge file. A text, that has a lot of blank spaces. I must write a program that removes the blank spaces, creates lines of exactly 80 characters long without splitting any word, and it will align the text to left and right simultaneously (justify text); The text is justified by placing additional spaces between words so that the line will end with a word and start with word, being exactly 80 chars long.

Yes this is a homework, but I am allowed to get any kind of online help. My code this far is able to do everything but align the text (justify):

Code:

#include <stdio.h>
#include "catalin.h"

int main()
{
   char text[145000], blank[1450000],c;
   FILE *input, *output;
   int n,f=80,i=0,j,l;
   input = fopen("asimov.in", "r");
   while ((c=fgetc(input))!=EOF){
      if (c=='\n') c=' ';
      text[i]=c; 
      i++;
   }
   fclose(input);
   blankremove(text,blank);
   wrap(blank,f);
   l=lenght(blank);
   output = fopen("out.out", "w");
   fprintf(output,blank);
}

int blankremove(char text[], char blank[])
{
   int c = 0, d = 0;
   while (text[c] != '\0') {
      if (text[c] == ' ') {
         int temp = c + 1;
         if (text[temp] != '\0') {
            while (text[temp] == ' ' && text[temp] != '\0') {
               if (text[temp] == ' ') {
                  c++;
               }
               temp++;
            }
         }
      }
      blank[d] = text[c];
      c++;
      d++;
   }
   blank[d] = '\0';
}

void wrap(char s[], const int wrapline)
{
    int i, k, wraploc, lastwrap;

    lastwrap = 0;
    wraploc = 0; //catalin

    for (i = 0; s[i] != '\0'; ++i, ++wraploc) {
        if (wraploc >= wrapline) {
            for (k = i; k > 0; --k) {
                // posibil are overflow
                if (k - lastwrap <= wrapline && s[k] == ' ') {
                    s[k] = '\n';
                    lastwrap = k+1;
                    break;
                }
            }
            wraploc = i-lastwrap;
        }
    }
    for (i = 0; i < wrapline; ++i) printf(" ");
    printf("|\n");
}

All I need is some help on creating a function that justifies the text. "justified—text is aligned along the left margin, and letter- and word-spacing is adjusted so that the text falls flush with both margins, also known as fully justified or full justification;" The spaces created when doing justification should be placed uniformly. No libraries should be used other than the default.


Solution

  • Using fscanf will read words and exclude whitespace.
    Then add words while the length of the line is less than 80.
    Add extra spaces to right justify the line.

    #include <stdio.h>
    
    int len ( char *str);
    char *cat ( char *to, char *from);
    char *cpy ( char *to, char *from);
    char *lastchr ( char *str, int ch);
    char *justify ( char *str, int wordcount, int width);
    
    int main( void) {
        char word[100] = "";
        char line[100] = "";
        char filenamein[] = "asimov.in";
        char filenameout[] = "out.out";
        int length = 0;
        int wordcount = 0;
        int pending = 0;
        FILE *pfin = NULL;
        FILE *pfout = NULL;
        if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
            perror ( filenamein);
            return 0;
        }
        if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
            fclose ( pfin);
            perror ( filenameout);
            return 0;
        }
    
        while ( 1 == fscanf ( pfin, "%99s", word)) {//read a word from file. will exclude whitespace
            length = len ( word);
            if ( 80 > len ( line) + length) {//add to line if it will fit
                cat ( line, word);
                cat ( line, " ");
                wordcount++;//needed in case more than one extra space per word
                pending = 1;
            }
            else {//adding last word would be more than 80
                justify ( line, wordcount, 80);
    
                fprintf ( pfout, "%s\n", line);
    
                cpy ( line, word);//copy pending word to line
                cat ( line, " ");//add a space
                wordcount = 1;//reset wordcount
                pending = 0;
            }
        }
        if ( pending) {
            justify ( line, wordcount, 80);
            fprintf ( pfout, "%s\n", line);
        }
    
        fclose ( pfin);
        fclose ( pfout);
    
        return 0;
    }
    
    int len ( char *str) {
        int length = 0;
    
        while ( *str) {//not at terminating zero
            length++;
            str++;
        }
        return length;
    }
    
    char *cat ( char *to, char *from) {
        char *start = to;
        while ( *to) {//not at terminating zero
            to++;
        }
        while ( *from) {
            *to = *from;//assign from to to
            to++;
            from++;
        }
        *to = 0;//terminate
        return start;
    }
    
    char *cpy ( char *to, char *from) {
        *to = 0;//set first character of to as terminating zero
        cat ( to, from);
        return to;
    }
    
    char *lastchr ( char *str, int ch) {
        char *found = NULL;
        while ( *str) {//not at terminating zero
            if ( ch == *str) {
                found = str;//set pointer
            }
            str++;//keep searching
        }
        return found;//return NULL or last found match
    }
    
    char *justify ( char *str, int wordcount, int width) {
        int length = 0;
        int addspaces = 0;
        int extraspace = 0;
    
        char *space = lastchr ( str, ' ');//find the last space
        *space = 0;//set it to terminate the line
        space--;//deduct one
        length = len ( str);
        addspaces = width - length;//difference is number of spaces needed
        extraspace = addspaces / wordcount;//may need more than one extra space
        char *end = space + addspaces;
        while ( addspaces) {
            *end = *space;//shift characters toward end
            if ( ' ' == *space) {//found a space
                for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
                    end--;
                    *end = ' ';
                    addspaces--;
                    if ( ! addspaces) {
                        break;//do not need to add more spaces
                    }
                }
            }
            end--;
            space--;
            if ( space <= str) {//reached the start of the line
                break;
            }
        }
        return str;
    }
    

    EDIT:

    #include <stdio.h>
    
    #define WIDTH 80
    #define SIZE ( WIDTH + 20)
    
    int len ( char *str);
    char *cat ( char *to, char *from);
    char *cpy ( char *to, char *from);
    char *lastchr ( char *str, int ch);
    char *justify ( char *str, int wordcount, int width);
    int scanword ( FILE *pfread, int size, char *word);
    
    int main( void) {
        char word[SIZE] = "";
        char line[SIZE] = "";
        char filenamein[] = "asimov.in";
        char filenameout[] = "out.out";
        int length = 0;
        int wordcount = 0;
        int pending = 0;
        //int paragraph = 1;
        FILE *pfin = NULL;
        FILE *pfout = NULL;
        if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
            perror ( filenamein);
            return 0;
        }
        if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
            fclose ( pfin);
            perror ( filenameout);
            return 0;
        }
    
        while ( 1 == scanword ( pfin, WIDTH, word)) {//read a word from file
            length = len ( word);
            if ( '\n' != word[0] && WIDTH > len ( line) + length) {//add to line if it will fit
                if ( 0 != word[0]) {
                    cat ( line, word);
                    cat ( line, " ");
                    wordcount++;//needed in case more than one extra space per word
                    pending = 1;//a line is pending
                }
            }
            else {//paragraph or adding last word would be more than 80
                if ( len ( line)) {//line has content
                    justify ( line, wordcount, WIDTH);
    
                    fprintf ( pfout, "%s\n", line);
                    //paragraph = 1;//could have a blank line
                }
                if ( /*paragraph &&*/ '\n' == word[0]) {
                    fprintf ( pfout, "\n");//print a blank line for paragraph
                    //paragraph = 0;//only allow one blank line
                }
    
                line[0] = 0;
                wordcount = 0;//reset wordcount
                if ( 0 != word[0] && '\n' != word[0]) {//word is not empty and is not newline
                    cpy ( line, word);//copy pending word to line
                    cat ( line, " ");//add a space
                    wordcount = 1;//reset wordcount
                }
                pending = 0;//nothing pending
            }
        }
        if ( pending) {//print pending line
            if ( len ( line)) {//line has content
                justify ( line, wordcount, WIDTH);
                fprintf ( pfout, "%s\n", line);
            }
        }
    
        fclose ( pfin);
        fclose ( pfout);
    
        return 0;
    }
    
    int scanword ( FILE *pfread, int size, char *word) {
        static int nl = 0;//static to retain value between function calls
        int ch = 0;
        int max = size - 1;//max characters that can fit in word and leave one to terminate
    
        *word = 0;//first character. zero terminate. empty line
        while ( max && ( ch = fgetc ( pfread))) {//read a character until max is zero
            if ( EOF == ch) {//end of file
                if ( max == size - 1) {
                    return 0;//no other characters read
                }
                return 1;//process the other characters that were read
            }
            if ( '\n' == ch) {//read a newline
                if ( '\n' == nl) {//consecutive newlines
                    *word = nl;
                    word++;
                    *word = 0;
                    //nl = 0;//reset since just had two consceutive newlines
                    return 1;
                }
                nl = ch;//set for first single newline
                return 1;
            }
            nl = 0;//reset to zero as prior character was not newline
            if ( ' ' == ch || '\t' == ch) {//read space or tab
                if ( max == size - 1) {//no characters in word so far
                    continue;//consume leading space and tab
                }
                return 1;//process the word read
            }
            *word = ch;//assign character to word
            word++;//increment pointer to next character
            *word = 0;//zero terminate
            max--;//deduct. one less charater can be read into word
        }
        return 0;
    }
    
    int len ( char *str) {
        int length = 0;
    
        while ( *str) {//character pointed to is not terminating zero
            length++;
            str++;//increment pointer to point to next character
        }
        return length;
    }
    
    char *cat ( char *to, char *from) {
        char *iterate = to;
        while ( *iterate) {//character pointed to is not terminating zero
            iterate++;//increment pointer to point to next character
        }
        while ( *from) {//character pointed to is not terminating zero
            *iterate = *from;//assign from to iterate
            iterate++;//increment pointer to point to next character
            from++;
        }
        *iterate = 0;//terminate
        return to;
    }
    
    char *cpy ( char *to, char *from) {
        *to = 0;//set first character of to as terminating zero
        cat ( to, from);
        return to;
    }
    
    char *lastchr ( char *str, int ch) {
        char *found = NULL;
        while ( *str) {//character pointed to is not terminating zero
            if ( ch == *str) {//character pointed to matches ch
                found = str;//assign pointer str to found
            }
            str++;//increment pointer to point to next character. keep searching
        }
        return found;//return NULL or pointer to last found match
    }
    
    char *justify ( char *str, int wordcount, int width) {
        int length = 0;
        int addspaces = 0;
        int extraspace = 0;
    
        char *space = lastchr ( str, ' ');//find the last space
        *space = 0;//set it to terminate the line
        space--;//deduct one
        length = len ( str);
        addspaces = width - length;//difference is number of spaces needed
        extraspace = addspaces;//may need more than one extra space
        if ( wordcount > 2) {
            extraspace = addspaces / ( wordcount - 1);//may need more than one extra space
        }
        char *end = space + addspaces;//set pointer end to point beyond wheree space points
    
        while ( addspaces) {//stop when addspaces is zero
            *end = *space;//assign character pointed to by space to the location pointed to by end
            if ( ' ' == *space) {//found a space
                for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
                    end--;
                    *end = ' ';
                    addspaces--;
                    if ( ! addspaces) {
                        break;//do not need to add more spaces
                    }
                }
            }
            end--;
            space--;
            if ( space <= str) {//reached the start of the line
                break;
            }
        }
        return str;
    }