为什么这个文件读取代码在文件末尾给出垃圾?

Why does this file reading code give junk at the end of the file?

我写了这段 Swift 代码,它使用 Glibc 将文本文件加载到字符串中,但有时它会在字符串末尾附加 \U{7F} 等垃圾字符(在同一个文件),我不知道为什么。 None 来自 Glibc 函数的错误检查会引发任何警告。它只是随机失败,即使在同一个文件上使用也是如此。

public typealias Cpath    = String
public typealias Unixpath = String
public typealias CString = UnsafeMutablePointer<CChar>

public func unix_path(_ path:Cpath) -> Unixpath
{
    guard path.characters.count > 1 
    else {
        return path
    }
    let path_i0 = path.startIndex
    let path_i2 = path.index(path_i0, offsetBy: 2)
    var expanded_path:Unixpath = path
    if path[path.startIndex..<path_i2] == "~/" {
        expanded_path = String(cString: getenv("HOME")) + 
                        path[path.index(path_i0, offsetBy: 1)..<path.endIndex]
    }
    return expanded_path
}

public func open_text_file(_ path:Cpath) -> String?
{
    let path = unix_path(path)

    guard let f:UnsafeMutablePointer<FILE> = fopen(path, "rb") 
    else {
        print("Error, could not open file '\(path)'")
        return nil
    }
    defer { fclose(f) }

    let fseek_status = fseeko(f, 0, SEEK_END)
    guard fseek_status == 0 
    else {
        print("Error, fseeko() failed with error code \(fseek_status)")
        return nil
    }

    let n = ftello(f)
    guard 0..<CLong.max ~= n
    else {
        print("Error, ftello() returned file size outsize of allowed range")
        return nil
    }
    rewind(f)

    guard let raw_buffer:UnsafeMutableRawPointer = malloc(n*MemoryLayout<CChar>.size)
    else {
        print("Error, could not allocate memory buffer")
        return nil
    }
    defer { free(raw_buffer) }

    let n_read = fread(raw_buffer, MemoryLayout<CChar>.size, n, f)
    guard n_read == n
    else {
        print("Error, fread() read \(n_read) characters out of \(n)")
        return nil
    }
    let cchar_buffer:CString = raw_buffer.assumingMemoryBound(to: CChar.self)
    return String(cString: cchar_buffer)
}

我发现了问题——我忽略了应该出现在 C 字符串末尾的标记 (\U{00})。没有它,String(cString:) 构造函数只会清理所有内容,直到它在系统内存中找到一个自然发生的 x00 字节。这是正确的形式:

...

// n + 1 to leave room for sentinel
let cchar_buffer:CString = CString.allocate(capacity: n + 1) 
defer { cchar_buffer.deallocate(capacity: n + 1) }

let n_read = fread(cchar_buffer, MemoryLayout<CChar>.size, n, f)
guard n_read == n
else {
    print("Error, fread() read \(n_read) characters out of \(n)")
    return nil
}
cchar_buffer[n] = 0 // cap with sentinel
return String(cString: cchar_buffer)