Here is basically the problem. I am given a huge file. A text, that has a lot of blank spaces. I must write a program that removes the blank spaces, creates lines of exactly 80 characters long without splitting any word, and it will align the text to left and right simultaneously (justify text); The text is justified by placing additional spaces between words so that the line will end with a word and start with word, being exactly 80 chars long.
Yes this is a homework, but I am allowed to get any kind of online help. My code this far is able to do everything but align the text (justify):
Code:
#include <stdio.h>
#include "catalin.h"
int main()
{
char text[145000], blank[1450000],c;
FILE *input, *output;
int n,f=80,i=0,j,l;
input = fopen("asimov.in", "r");
while ((c=fgetc(input))!=EOF){
if (c=='\n') c=' ';
text[i]=c;
i++;
}
fclose(input);
blankremove(text,blank);
wrap(blank,f);
l=lenght(blank);
output = fopen("out.out", "w");
fprintf(output,blank);
}
int blankremove(char text[], char blank[])
{
int c = 0, d = 0;
while (text[c] != '\0') {
if (text[c] == ' ') {
int temp = c + 1;
if (text[temp] != '\0') {
while (text[temp] == ' ' && text[temp] != '\0') {
if (text[temp] == ' ') {
c++;
}
temp++;
}
}
}
blank[d] = text[c];
c++;
d++;
}
blank[d] = '\0';
}
void wrap(char s[], const int wrapline)
{
int i, k, wraploc, lastwrap;
lastwrap = 0;
wraploc = 0; //catalin
for (i = 0; s[i] != '\0'; ++i, ++wraploc) {
if (wraploc >= wrapline) {
for (k = i; k > 0; --k) {
// posibil are overflow
if (k - lastwrap <= wrapline && s[k] == ' ') {
s[k] = '\n';
lastwrap = k+1;
break;
}
}
wraploc = i-lastwrap;
}
}
for (i = 0; i < wrapline; ++i) printf(" ");
printf("|\n");
}
All I need is some help on creating a function that justifies the text. "justified—text is aligned along the left margin, and letter- and word-spacing is adjusted so that the text falls flush with both margins, also known as fully justified or full justification;" The spaces created when doing justification should be placed uniformly. No libraries should be used other than the default.
Using fscanf
will read words and exclude whitespace.
Then add words while the length of the line is less than 80.
Add extra spaces to right justify the line.
#include <stdio.h>
int len ( char *str);
char *cat ( char *to, char *from);
char *cpy ( char *to, char *from);
char *lastchr ( char *str, int ch);
char *justify ( char *str, int wordcount, int width);
int main( void) {
char word[100] = "";
char line[100] = "";
char filenamein[] = "asimov.in";
char filenameout[] = "out.out";
int length = 0;
int wordcount = 0;
int pending = 0;
FILE *pfin = NULL;
FILE *pfout = NULL;
if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
perror ( filenamein);
return 0;
}
if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
fclose ( pfin);
perror ( filenameout);
return 0;
}
while ( 1 == fscanf ( pfin, "%99s", word)) {//read a word from file. will exclude whitespace
length = len ( word);
if ( 80 > len ( line) + length) {//add to line if it will fit
cat ( line, word);
cat ( line, " ");
wordcount++;//needed in case more than one extra space per word
pending = 1;
}
else {//adding last word would be more than 80
justify ( line, wordcount, 80);
fprintf ( pfout, "%s\n", line);
cpy ( line, word);//copy pending word to line
cat ( line, " ");//add a space
wordcount = 1;//reset wordcount
pending = 0;
}
}
if ( pending) {
justify ( line, wordcount, 80);
fprintf ( pfout, "%s\n", line);
}
fclose ( pfin);
fclose ( pfout);
return 0;
}
int len ( char *str) {
int length = 0;
while ( *str) {//not at terminating zero
length++;
str++;
}
return length;
}
char *cat ( char *to, char *from) {
char *start = to;
while ( *to) {//not at terminating zero
to++;
}
while ( *from) {
*to = *from;//assign from to to
to++;
from++;
}
*to = 0;//terminate
return start;
}
char *cpy ( char *to, char *from) {
*to = 0;//set first character of to as terminating zero
cat ( to, from);
return to;
}
char *lastchr ( char *str, int ch) {
char *found = NULL;
while ( *str) {//not at terminating zero
if ( ch == *str) {
found = str;//set pointer
}
str++;//keep searching
}
return found;//return NULL or last found match
}
char *justify ( char *str, int wordcount, int width) {
int length = 0;
int addspaces = 0;
int extraspace = 0;
char *space = lastchr ( str, ' ');//find the last space
*space = 0;//set it to terminate the line
space--;//deduct one
length = len ( str);
addspaces = width - length;//difference is number of spaces needed
extraspace = addspaces / wordcount;//may need more than one extra space
char *end = space + addspaces;
while ( addspaces) {
*end = *space;//shift characters toward end
if ( ' ' == *space) {//found a space
for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
end--;
*end = ' ';
addspaces--;
if ( ! addspaces) {
break;//do not need to add more spaces
}
}
}
end--;
space--;
if ( space <= str) {//reached the start of the line
break;
}
}
return str;
}
EDIT:
#include <stdio.h>
#define WIDTH 80
#define SIZE ( WIDTH + 20)
int len ( char *str);
char *cat ( char *to, char *from);
char *cpy ( char *to, char *from);
char *lastchr ( char *str, int ch);
char *justify ( char *str, int wordcount, int width);
int scanword ( FILE *pfread, int size, char *word);
int main( void) {
char word[SIZE] = "";
char line[SIZE] = "";
char filenamein[] = "asimov.in";
char filenameout[] = "out.out";
int length = 0;
int wordcount = 0;
int pending = 0;
//int paragraph = 1;
FILE *pfin = NULL;
FILE *pfout = NULL;
if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
perror ( filenamein);
return 0;
}
if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
fclose ( pfin);
perror ( filenameout);
return 0;
}
while ( 1 == scanword ( pfin, WIDTH, word)) {//read a word from file
length = len ( word);
if ( '\n' != word[0] && WIDTH > len ( line) + length) {//add to line if it will fit
if ( 0 != word[0]) {
cat ( line, word);
cat ( line, " ");
wordcount++;//needed in case more than one extra space per word
pending = 1;//a line is pending
}
}
else {//paragraph or adding last word would be more than 80
if ( len ( line)) {//line has content
justify ( line, wordcount, WIDTH);
fprintf ( pfout, "%s\n", line);
//paragraph = 1;//could have a blank line
}
if ( /*paragraph &&*/ '\n' == word[0]) {
fprintf ( pfout, "\n");//print a blank line for paragraph
//paragraph = 0;//only allow one blank line
}
line[0] = 0;
wordcount = 0;//reset wordcount
if ( 0 != word[0] && '\n' != word[0]) {//word is not empty and is not newline
cpy ( line, word);//copy pending word to line
cat ( line, " ");//add a space
wordcount = 1;//reset wordcount
}
pending = 0;//nothing pending
}
}
if ( pending) {//print pending line
if ( len ( line)) {//line has content
justify ( line, wordcount, WIDTH);
fprintf ( pfout, "%s\n", line);
}
}
fclose ( pfin);
fclose ( pfout);
return 0;
}
int scanword ( FILE *pfread, int size, char *word) {
static int nl = 0;//static to retain value between function calls
int ch = 0;
int max = size - 1;//max characters that can fit in word and leave one to terminate
*word = 0;//first character. zero terminate. empty line
while ( max && ( ch = fgetc ( pfread))) {//read a character until max is zero
if ( EOF == ch) {//end of file
if ( max == size - 1) {
return 0;//no other characters read
}
return 1;//process the other characters that were read
}
if ( '\n' == ch) {//read a newline
if ( '\n' == nl) {//consecutive newlines
*word = nl;
word++;
*word = 0;
//nl = 0;//reset since just had two consceutive newlines
return 1;
}
nl = ch;//set for first single newline
return 1;
}
nl = 0;//reset to zero as prior character was not newline
if ( ' ' == ch || '\t' == ch) {//read space or tab
if ( max == size - 1) {//no characters in word so far
continue;//consume leading space and tab
}
return 1;//process the word read
}
*word = ch;//assign character to word
word++;//increment pointer to next character
*word = 0;//zero terminate
max--;//deduct. one less charater can be read into word
}
return 0;
}
int len ( char *str) {
int length = 0;
while ( *str) {//character pointed to is not terminating zero
length++;
str++;//increment pointer to point to next character
}
return length;
}
char *cat ( char *to, char *from) {
char *iterate = to;
while ( *iterate) {//character pointed to is not terminating zero
iterate++;//increment pointer to point to next character
}
while ( *from) {//character pointed to is not terminating zero
*iterate = *from;//assign from to iterate
iterate++;//increment pointer to point to next character
from++;
}
*iterate = 0;//terminate
return to;
}
char *cpy ( char *to, char *from) {
*to = 0;//set first character of to as terminating zero
cat ( to, from);
return to;
}
char *lastchr ( char *str, int ch) {
char *found = NULL;
while ( *str) {//character pointed to is not terminating zero
if ( ch == *str) {//character pointed to matches ch
found = str;//assign pointer str to found
}
str++;//increment pointer to point to next character. keep searching
}
return found;//return NULL or pointer to last found match
}
char *justify ( char *str, int wordcount, int width) {
int length = 0;
int addspaces = 0;
int extraspace = 0;
char *space = lastchr ( str, ' ');//find the last space
*space = 0;//set it to terminate the line
space--;//deduct one
length = len ( str);
addspaces = width - length;//difference is number of spaces needed
extraspace = addspaces;//may need more than one extra space
if ( wordcount > 2) {
extraspace = addspaces / ( wordcount - 1);//may need more than one extra space
}
char *end = space + addspaces;//set pointer end to point beyond wheree space points
while ( addspaces) {//stop when addspaces is zero
*end = *space;//assign character pointed to by space to the location pointed to by end
if ( ' ' == *space) {//found a space
for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
end--;
*end = ' ';
addspaces--;
if ( ! addspaces) {
break;//do not need to add more spaces
}
}
}
end--;
space--;
if ( space <= str) {//reached the start of the line
break;
}
}
return str;
}