Linux 中的 C 编程：无法为查找文件中子字符串出现次数的程序获得正确的输出

Question

我正在编写一个程序，用于查找写入缓冲区的文本文件（也从命令行读取）中命令行输入子字符串的出现次数。

当我运行 bash 中的代码时，我收到错误：分段错误（核心已转储）。我仍在学习如何在这种环境中使用 C 进行编码，并且对为什么会发生分段错误（滥用动态内存分配？）有一些想法，但我找不到它的问题。我所能得出的结论是问题出在 for 循环内（我标记了代码中可能导致错误的位置）。

编辑：我设法通过将 argv[j] 更改为 argv[i] 来修复分段错误，但是当我现在运行代码时，count1 总是 returns 0 甚至如果子字符串在文本文件中多次出现，即使我已经多次检查代码，我也不确定哪里出了问题。

$ more foo.txt

aabbccc

$ ./main foo.txt a

0

#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>

int main(int argc, char *argv[]) {

    FILE *fp; 
    long lsize; 
    char *buf;
    int count = 0, count1 = 0; 
    int i, j, k, l1, l2;   

    if (argc < 3) { printf("Error: insufficient arguments.\n"); return(1); };

    fp = fopen(argv[1], "r"); 

    if (!fp) { 
        perror(argv[1]); 
        exit(1); 
    }

    //get size of file 
    fseek(fp, 0L, SEEK_END);
    lsize = ftell(fp); 
    rewind(fp);

    //allocate memory for entire content
    buf = calloc(1, lsize+1);

    if (!buf) { 
        fclose(fp); 
        fputs("Memory alloc fails.\n", stderr); 
        exit(1); 
    }

    //copy the file into the buffer
    if (1 != fread(buf, lsize, 1, fp)) {
        fclose(fp);
        free(buf); 
        fputs("Entire read fails.\n", stderr); 
        exit(1); 
    }

    l1 = strlen(buf);

    //error is somewhere here
    for (i = 2; i < argc; i++) {
        for (j = 0; j < l1;) {
            k = 0; 
            count = 0; 
            while ((&buf[j] == argv[k])) {
                count++;
                j++; 
                k++; 
            }
            if (count == strlen(argv[j])) {
                count1++; 
                count = 0; 
            }
            else
                j++; 
        }
        printf("%d\n", count1);
    }

    fclose(fp); 

    return 0; 
}

Answer 1

fread(buf, lsize, 1, fp) 将读取 1 个 lsize 字节块，但是 fread 不关心内容，不会为字符串，所以 l1 = strlen(buf); 会产生未定义的行为，其余的因此可以忽略结果（并且您的计数也有错误）。请注意，文件末尾通常没有 0 终止字节，这甚至适用于包含文本的文件，它们通常以换行符。

您必须自己设置以 0 结束的字节：

if (1 != fread(buf, lsize, 1, fp)) {
    fclose(fp);
    free(buf); 
    fputs("Entire read fails.\n", stderr); 
    exit(1); 
}

buf[lsize] = '0';

并且你可以使用strstr来获取子串的位置，像这样：

for(i = 2; i < argc; ++i)
{
    char *content = buf;
    int count = 0;

    while((content = strstr(content, argv[i])))
    {
        count++;
        content++; // point to the next char in the substring
    }

    printf("The substring '%s' appears %d time(s)\n", argv[i], count);

}

您的计数有误，有些错误。这个比较

&buf[j] == argv[k]

错了，你比较的是指针，不是内容。你必须使用 strcmp 比较字符串。在这种情况下，您将不得不使用 strncmp 因为您只想匹配子字符串：

while(strncmp(&buf[j], argv[k], strlen(argv[k])) == 0)
{
    // substring matched
}

但这也是错误的，因为你也在递增 k，这将给你下一个论点，最后你可能会读到 beyond the limits of argv 如果子字符串长于参数个数。根据您的代码，你必须比较字符：

while(buf[j] == argv[i][k])
{
    j++;
    k++;
}

只有当子字符串匹配时，您才需要递增 counter，例如这个：

l1 = strlen(buf);

for (i = 2; i < argc; i++) {
    int count = 0;
    int k = 0; // running index for inspecting argv[i]
    for (j = 0; j < l1; ++j) {
        while(buf[j + k] == argv[i][k])
            k++;

        // if all characters of argv[i] 
        // matched, argv[i][k] will be the
        // 0-terminating byte
        if(argv[i][k] == 0)
            count++;

        // reset running index for argv[i]
        // go to next char if buf
        k = 0;
    }

    printf("The substring '%s' appears %d time(s)\n", argv[i], count);
}

Linux 中的 C 编程：无法为查找文件中子字符串出现次数的程序获得正确的输出

C programming in Linux: not getting correct output for program that finds number of occurrences of substring in file

c

linux

segmentation-fault

dynamic-memory-allocation