Swift:获取字符串的子串,忽略大小写和重音

Swift: get substring of string, ignoring case and accents

我想从字符串中提取一个子字符串,忽略重音和大小写。

例如,如果字符串是 Curaçao 并且输入子字符串 aca,它将把字符串拆分为三个子字符串:Cur(匹配前的子字符串),aça(匹配子串)和o(匹配后的剩余子串)。

我试过这段代码:

extension String {
    subscript(offset: Int) -> Character { self[index(startIndex, offsetBy: offset)] }
    subscript(range: Range<Int>) -> SubSequence {
        let startIndex = index(self.startIndex, offsetBy: range.lowerBound)
        return self[startIndex..<index(startIndex, offsetBy: range.count)]
    }
    subscript(range: ClosedRange<Int>) -> SubSequence {
        let startIndex = index(self.startIndex, offsetBy: range.lowerBound)
        return self[startIndex..<index(startIndex, offsetBy: range.count)]
    }
    subscript(range: PartialRangeFrom<Int>) -> SubSequence { self[index(startIndex, offsetBy: range.lowerBound)...] }
    subscript(range: PartialRangeThrough<Int>) -> SubSequence { self[...index(startIndex, offsetBy: range.upperBound)] }
    subscript(range: PartialRangeUpTo<Int>) -> SubSequence { self[..<index(startIndex, offsetBy: range.upperBound)] }
    
    func highlight(substring: String) -> String {
        if let range = lowercased()
            .folding(options: .diacriticInsensitive, locale: .current)
            .range(of: substring
                    .lowercased()
                    .folding(options: .diacriticInsensitive, locale: .current)) {
            let startPos = distance(from: startIndex, to: range.lowerBound)
            let endPos = distance(from: startIndex, to: range.upperBound)
            let firstSubstring = self[..<startPos]
            let secondSubstring = self[range]
            let thirdSubstring = self[endPos...]
            return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
        } else {
            return ""
        }
    }
}

但是,尝试时:

print("Curaçao".highlight(substring: "aca"))

Cur[aç]ao

另一个例子:

print("Curaçaoçao".highlight(substring: "acaoca"))

Cur[açao]çao

这是什么原因造成的?感谢您的帮助

您可以简单地使用 localizedStandardRange,它是变音符号且不区分大小写。顺便说一句,不需要将范围转换为整数:

localizedStandardRange(of:)

Finds and returns the range of the first occurrence of a given string within the string by performing a case and diacritic insensitive, locale-aware search.


extension StringProtocol where Self: RangeReplaceableCollection {
    func highlight<S: StringProtocol>(substring: S) -> SubSequence? {
        guard let range = localizedStandardRange(of: substring) else { return nil }
        return self[..<range.lowerBound] + "[" + self[range] + "]" + self[range.upperBound...]
    }
}

print("Curaçao".highlight(substring: "aca") ?? "")
print("Curaçaoçao".highlight(substring: "acaoca") ?? "")

这将打印

Cur[aça]o
Cur[açaoça]o


如果您只是想解决扩展程序的问题,只需将正确的选项 caseInsensitivediacriticInsensitive 传递给您的范围方法,不要将范围转换为整数:

func highlight(substring: String) -> String {
    if let range = range(of: substring, options: [.caseInsensitive, .diacriticInsensitive]) {
        let firstSubstring = self[..<range.lowerBound]
        let secondSubstring = self[range]
        let thirdSubstring = self[range.upperBound]
        return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
    } else {
        return ""
    }
}

您的方法失败的原因是您搜索范围并计算不同字符串上的距离。

extension String {
    subscript(_ range: CountableRange<Int>) -> String {
        let start = index(startIndex, offsetBy: max(0, range.lowerBound))
        let end = index(start, offsetBy: min(self.count - range.lowerBound, 
                                             range.upperBound - range.lowerBound))
        return String(self[start..<end])
    }

    subscript(_ range: CountablePartialRangeFrom<Int>) -> String {
        let start = index(startIndex, offsetBy: max(0, range.lowerBound))
        return String(self[start...])
    }

    func highlight(substring: String) -> String {
        let stringWithoutCaseOrLetters = self.folding(options: .diacriticInsensitive, locale: .current)
        if let range = lowercased()
            .folding(options: .diacriticInsensitive, locale: .current)
            .range(of: substring
                    .lowercased()
                    .folding(options: .diacriticInsensitive, locale: .current)) {
            let startPos:Int = stringWithoutCaseOrLetters.distance(from: startIndex, to: range.lowerBound)
            let endPos:Int = stringWithoutCaseOrLetters.distance(from: startIndex, to: range.upperBound)
            let firstSubstring = self[0..<startPos]
            let secondSubstring = self[startPos..<endPos]
            let thirdSubstring = self[endPos...]
            return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
        }
        return ""
    }
}

extension StringProtocol {
    func distance(of element: Element) -> Int? { firstIndex(of: element)?.distance(in: self) }
    func distance<S: StringProtocol>(of string: S) -> Int? { range(of: string)?.lowerBound.distance(in: self) }
}

extension Collection {
    func distance(to index: Index) -> Int { distance(from: startIndex, to: index) }
}

extension String.Index {
    func distance<S: StringProtocol>(in string: S) -> Int { string.distance(to: self) }
}

let string = "Curaçao"
print(string[0..<string.count])
print(string.highlight(substring: "aca"))

print("Curaçaoçao".highlight(substring: "acaoca"))