如何删除C中的文本?

How to delete Text in C?

基本上就是这个问题。我得到了一个巨大的文件。一个文本,有很多空格。我必须编写一个程序来删除空格,创建恰好 80 个字符长的行而不拆分任何单词,并且它会同时将文本左右对齐(对齐文本);通过在单词之间放置额外的空格来对齐文本,这样该行将以单词结尾并以单词开头,正好 80 个字符长。

是的,这是作业,但我可以获取任何类型的在线帮助。到目前为止,我的代码能够完成除对齐文本(对齐)之外的所有操作:

代码:

#include <stdio.h>
#include "catalin.h"

int main()
{
   char text[145000], blank[1450000],c;
   FILE *input, *output;
   int n,f=80,i=0,j,l;
   input = fopen("asimov.in", "r");
   while ((c=fgetc(input))!=EOF){
      if (c=='\n') c=' ';
      text[i]=c; 
      i++;
   }
   fclose(input);
   blankremove(text,blank);
   wrap(blank,f);
   l=lenght(blank);
   output = fopen("out.out", "w");
   fprintf(output,blank);
}

int blankremove(char text[], char blank[])
{
   int c = 0, d = 0;
   while (text[c] != '[=10=]') {
      if (text[c] == ' ') {
         int temp = c + 1;
         if (text[temp] != '[=10=]') {
            while (text[temp] == ' ' && text[temp] != '[=10=]') {
               if (text[temp] == ' ') {
                  c++;
               }
               temp++;
            }
         }
      }
      blank[d] = text[c];
      c++;
      d++;
   }
   blank[d] = '[=10=]';
}

void wrap(char s[], const int wrapline)
{
    int i, k, wraploc, lastwrap;

    lastwrap = 0;
    wraploc = 0; //catalin

    for (i = 0; s[i] != '[=10=]'; ++i, ++wraploc) {
        if (wraploc >= wrapline) {
            for (k = i; k > 0; --k) {
                // posibil are overflow
                if (k - lastwrap <= wrapline && s[k] == ' ') {
                    s[k] = '\n';
                    lastwrap = k+1;
                    break;
                }
            }
            wraploc = i-lastwrap;
        }
    }
    for (i = 0; i < wrapline; ++i) printf(" ");
    printf("|\n");
}

我只需要一些帮助来创建一个证明文本合理的函数。 "justified—text is aligned along the left margin, and letter- and word-spacing is adjusted so that the text falls flush with both margins, also known as fully justified or full justification;" 对齐时创建的空格要统一放置。除默认库外,不应使用任何库。

忽略现有代码中的许多错误,您需要考虑要实现的目标。

考虑一个更简单的例子。假设您的源文本是 "Hello world" 并且您将其宽度调整为 15。"Hello world" 的长度为 11 个字符,比我们需要的少 4 个。字符串中有 1 个 space,所以你知道你需要让 space 变成 5 个 space,这样它就变成 "Hello world".

下一个示例:"I like bees!" - 即 12 个字符,但它有 2 个 space,您需要额外的 3 个 space。其中一个 space 必须变成 2 space,另外 3 个 space 才能填写 15 个字符。

因此您的代码首先需要计算您当前正在使用的行中有多少 space。您可以在确定在哪里换行时执行此操作,并且如果您跟踪最后一个 space 的位置,则不需要回溯以再次找到它。

其次,知道需要填充多少个额外的字符。

最后找到行内的 space 并在其中平均添加额外的 space。此时您最好使用新字符串,因为虽然可以将 space 插入 s,但它很复杂并且更有可能引入更多错误。

使用 fscanf 将读取单词并排除空格。
然后当行的长度小于80时添加单词。
添加额外的空格以右对齐该行。

#include <stdio.h>

int len ( char *str);
char *cat ( char *to, char *from);
char *cpy ( char *to, char *from);
char *lastchr ( char *str, int ch);
char *justify ( char *str, int wordcount, int width);

int main( void) {
    char word[100] = "";
    char line[100] = "";
    char filenamein[] = "asimov.in";
    char filenameout[] = "out.out";
    int length = 0;
    int wordcount = 0;
    int pending = 0;
    FILE *pfin = NULL;
    FILE *pfout = NULL;
    if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
        perror ( filenamein);
        return 0;
    }
    if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
        fclose ( pfin);
        perror ( filenameout);
        return 0;
    }

    while ( 1 == fscanf ( pfin, "%99s", word)) {//read a word from file. will exclude whitespace
        length = len ( word);
        if ( 80 > len ( line) + length) {//add to line if it will fit
            cat ( line, word);
            cat ( line, " ");
            wordcount++;//needed in case more than one extra space per word
            pending = 1;
        }
        else {//adding last word would be more than 80
            justify ( line, wordcount, 80);

            fprintf ( pfout, "%s\n", line);

            cpy ( line, word);//copy pending word to line
            cat ( line, " ");//add a space
            wordcount = 1;//reset wordcount
            pending = 0;
        }
    }
    if ( pending) {
        justify ( line, wordcount, 80);
        fprintf ( pfout, "%s\n", line);
    }

    fclose ( pfin);
    fclose ( pfout);

    return 0;
}

int len ( char *str) {
    int length = 0;

    while ( *str) {//not at terminating zero
        length++;
        str++;
    }
    return length;
}

char *cat ( char *to, char *from) {
    char *start = to;
    while ( *to) {//not at terminating zero
        to++;
    }
    while ( *from) {
        *to = *from;//assign from to to
        to++;
        from++;
    }
    *to = 0;//terminate
    return start;
}

char *cpy ( char *to, char *from) {
    *to = 0;//set first character of to as terminating zero
    cat ( to, from);
    return to;
}

char *lastchr ( char *str, int ch) {
    char *found = NULL;
    while ( *str) {//not at terminating zero
        if ( ch == *str) {
            found = str;//set pointer
        }
        str++;//keep searching
    }
    return found;//return NULL or last found match
}

char *justify ( char *str, int wordcount, int width) {
    int length = 0;
    int addspaces = 0;
    int extraspace = 0;

    char *space = lastchr ( str, ' ');//find the last space
    *space = 0;//set it to terminate the line
    space--;//deduct one
    length = len ( str);
    addspaces = width - length;//difference is number of spaces needed
    extraspace = addspaces / wordcount;//may need more than one extra space
    char *end = space + addspaces;
    while ( addspaces) {
        *end = *space;//shift characters toward end
        if ( ' ' == *space) {//found a space
            for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
                end--;
                *end = ' ';
                addspaces--;
                if ( ! addspaces) {
                    break;//do not need to add more spaces
                }
            }
        }
        end--;
        space--;
        if ( space <= str) {//reached the start of the line
            break;
        }
    }
    return str;
}

编辑:

#include <stdio.h>

#define WIDTH 80
#define SIZE ( WIDTH + 20)

int len ( char *str);
char *cat ( char *to, char *from);
char *cpy ( char *to, char *from);
char *lastchr ( char *str, int ch);
char *justify ( char *str, int wordcount, int width);
int scanword ( FILE *pfread, int size, char *word);

int main( void) {
    char word[SIZE] = "";
    char line[SIZE] = "";
    char filenamein[] = "asimov.in";
    char filenameout[] = "out.out";
    int length = 0;
    int wordcount = 0;
    int pending = 0;
    //int paragraph = 1;
    FILE *pfin = NULL;
    FILE *pfout = NULL;
    if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
        perror ( filenamein);
        return 0;
    }
    if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
        fclose ( pfin);
        perror ( filenameout);
        return 0;
    }

    while ( 1 == scanword ( pfin, WIDTH, word)) {//read a word from file
        length = len ( word);
        if ( '\n' != word[0] && WIDTH > len ( line) + length) {//add to line if it will fit
            if ( 0 != word[0]) {
                cat ( line, word);
                cat ( line, " ");
                wordcount++;//needed in case more than one extra space per word
                pending = 1;//a line is pending
            }
        }
        else {//paragraph or adding last word would be more than 80
            if ( len ( line)) {//line has content
                justify ( line, wordcount, WIDTH);

                fprintf ( pfout, "%s\n", line);
                //paragraph = 1;//could have a blank line
            }
            if ( /*paragraph &&*/ '\n' == word[0]) {
                fprintf ( pfout, "\n");//print a blank line for paragraph
                //paragraph = 0;//only allow one blank line
            }

            line[0] = 0;
            wordcount = 0;//reset wordcount
            if ( 0 != word[0] && '\n' != word[0]) {//word is not empty and is not newline
                cpy ( line, word);//copy pending word to line
                cat ( line, " ");//add a space
                wordcount = 1;//reset wordcount
            }
            pending = 0;//nothing pending
        }
    }
    if ( pending) {//print pending line
        if ( len ( line)) {//line has content
            justify ( line, wordcount, WIDTH);
            fprintf ( pfout, "%s\n", line);
        }
    }

    fclose ( pfin);
    fclose ( pfout);

    return 0;
}

int scanword ( FILE *pfread, int size, char *word) {
    static int nl = 0;//static to retain value between function calls
    int ch = 0;
    int max = size - 1;//max characters that can fit in word and leave one to terminate

    *word = 0;//first character. zero terminate. empty line
    while ( max && ( ch = fgetc ( pfread))) {//read a character until max is zero
        if ( EOF == ch) {//end of file
            if ( max == size - 1) {
                return 0;//no other characters read
            }
            return 1;//process the other characters that were read
        }
        if ( '\n' == ch) {//read a newline
            if ( '\n' == nl) {//consecutive newlines
                *word = nl;
                word++;
                *word = 0;
                //nl = 0;//reset since just had two consceutive newlines
                return 1;
            }
            nl = ch;//set for first single newline
            return 1;
        }
        nl = 0;//reset to zero as prior character was not newline
        if ( ' ' == ch || '\t' == ch) {//read space or tab
            if ( max == size - 1) {//no characters in word so far
                continue;//consume leading space and tab
            }
            return 1;//process the word read
        }
        *word = ch;//assign character to word
        word++;//increment pointer to next character
        *word = 0;//zero terminate
        max--;//deduct. one less charater can be read into word
    }
    return 0;
}

int len ( char *str) {
    int length = 0;

    while ( *str) {//character pointed to is not terminating zero
        length++;
        str++;//increment pointer to point to next character
    }
    return length;
}

char *cat ( char *to, char *from) {
    char *iterate = to;
    while ( *iterate) {//character pointed to is not terminating zero
        iterate++;//increment pointer to point to next character
    }
    while ( *from) {//character pointed to is not terminating zero
        *iterate = *from;//assign from to iterate
        iterate++;//increment pointer to point to next character
        from++;
    }
    *iterate = 0;//terminate
    return to;
}

char *cpy ( char *to, char *from) {
    *to = 0;//set first character of to as terminating zero
    cat ( to, from);
    return to;
}

char *lastchr ( char *str, int ch) {
    char *found = NULL;
    while ( *str) {//character pointed to is not terminating zero
        if ( ch == *str) {//character pointed to matches ch
            found = str;//assign pointer str to found
        }
        str++;//increment pointer to point to next character. keep searching
    }
    return found;//return NULL or pointer to last found match
}

char *justify ( char *str, int wordcount, int width) {
    int length = 0;
    int addspaces = 0;
    int extraspace = 0;

    char *space = lastchr ( str, ' ');//find the last space
    *space = 0;//set it to terminate the line
    space--;//deduct one
    length = len ( str);
    addspaces = width - length;//difference is number of spaces needed
    extraspace = addspaces;//may need more than one extra space
    if ( wordcount > 2) {
        extraspace = addspaces / ( wordcount - 1);//may need more than one extra space
    }
    char *end = space + addspaces;//set pointer end to point beyond wheree space points

    while ( addspaces) {//stop when addspaces is zero
        *end = *space;//assign character pointed to by space to the location pointed to by end
        if ( ' ' == *space) {//found a space
            for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
                end--;
                *end = ' ';
                addspaces--;
                if ( ! addspaces) {
                    break;//do not need to add more spaces
                }
            }
        }
        end--;
        space--;
        if ( space <= str) {//reached the start of the line
            break;
        }
    }
    return str;
}