Swift:获取字符串的子串,忽略大小写和重音
Swift: get substring of string, ignoring case and accents
我想从字符串中提取一个子字符串,忽略重音和大小写。
例如,如果字符串是 Curaçao
并且输入子字符串 aca
,它将把字符串拆分为三个子字符串:Cur
(匹配前的子字符串),aça
(匹配子串)和o
(匹配后的剩余子串)。
我试过这段代码:
extension String {
subscript(offset: Int) -> Character { self[index(startIndex, offsetBy: offset)] }
subscript(range: Range<Int>) -> SubSequence {
let startIndex = index(self.startIndex, offsetBy: range.lowerBound)
return self[startIndex..<index(startIndex, offsetBy: range.count)]
}
subscript(range: ClosedRange<Int>) -> SubSequence {
let startIndex = index(self.startIndex, offsetBy: range.lowerBound)
return self[startIndex..<index(startIndex, offsetBy: range.count)]
}
subscript(range: PartialRangeFrom<Int>) -> SubSequence { self[index(startIndex, offsetBy: range.lowerBound)...] }
subscript(range: PartialRangeThrough<Int>) -> SubSequence { self[...index(startIndex, offsetBy: range.upperBound)] }
subscript(range: PartialRangeUpTo<Int>) -> SubSequence { self[..<index(startIndex, offsetBy: range.upperBound)] }
func highlight(substring: String) -> String {
if let range = lowercased()
.folding(options: .diacriticInsensitive, locale: .current)
.range(of: substring
.lowercased()
.folding(options: .diacriticInsensitive, locale: .current)) {
let startPos = distance(from: startIndex, to: range.lowerBound)
let endPos = distance(from: startIndex, to: range.upperBound)
let firstSubstring = self[..<startPos]
let secondSubstring = self[range]
let thirdSubstring = self[endPos...]
return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
} else {
return ""
}
}
}
但是,尝试时:
print("Curaçao".highlight(substring: "aca"))
Cur[aç]ao
另一个例子:
print("Curaçaoçao".highlight(substring: "acaoca"))
Cur[açao]çao
这是什么原因造成的?感谢您的帮助
您可以简单地使用 localizedStandardRange,它是变音符号且不区分大小写。顺便说一句,不需要将范围转换为整数:
localizedStandardRange(of:)
Finds and returns the range of the first occurrence of a given string within the string by performing a case and diacritic insensitive, locale-aware search.
extension StringProtocol where Self: RangeReplaceableCollection {
func highlight<S: StringProtocol>(substring: S) -> SubSequence? {
guard let range = localizedStandardRange(of: substring) else { return nil }
return self[..<range.lowerBound] + "[" + self[range] + "]" + self[range.upperBound...]
}
}
print("Curaçao".highlight(substring: "aca") ?? "")
print("Curaçaoçao".highlight(substring: "acaoca") ?? "")
这将打印
Cur[aça]o
Cur[açaoça]o
如果您只是想解决扩展程序的问题,只需将正确的选项 caseInsensitive
和 diacriticInsensitive
传递给您的范围方法,不要将范围转换为整数:
func highlight(substring: String) -> String {
if let range = range(of: substring, options: [.caseInsensitive, .diacriticInsensitive]) {
let firstSubstring = self[..<range.lowerBound]
let secondSubstring = self[range]
let thirdSubstring = self[range.upperBound]
return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
} else {
return ""
}
}
您的方法失败的原因是您搜索范围并计算不同字符串上的距离。
extension String {
subscript(_ range: CountableRange<Int>) -> String {
let start = index(startIndex, offsetBy: max(0, range.lowerBound))
let end = index(start, offsetBy: min(self.count - range.lowerBound,
range.upperBound - range.lowerBound))
return String(self[start..<end])
}
subscript(_ range: CountablePartialRangeFrom<Int>) -> String {
let start = index(startIndex, offsetBy: max(0, range.lowerBound))
return String(self[start...])
}
func highlight(substring: String) -> String {
let stringWithoutCaseOrLetters = self.folding(options: .diacriticInsensitive, locale: .current)
if let range = lowercased()
.folding(options: .diacriticInsensitive, locale: .current)
.range(of: substring
.lowercased()
.folding(options: .diacriticInsensitive, locale: .current)) {
let startPos:Int = stringWithoutCaseOrLetters.distance(from: startIndex, to: range.lowerBound)
let endPos:Int = stringWithoutCaseOrLetters.distance(from: startIndex, to: range.upperBound)
let firstSubstring = self[0..<startPos]
let secondSubstring = self[startPos..<endPos]
let thirdSubstring = self[endPos...]
return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
}
return ""
}
}
extension StringProtocol {
func distance(of element: Element) -> Int? { firstIndex(of: element)?.distance(in: self) }
func distance<S: StringProtocol>(of string: S) -> Int? { range(of: string)?.lowerBound.distance(in: self) }
}
extension Collection {
func distance(to index: Index) -> Int { distance(from: startIndex, to: index) }
}
extension String.Index {
func distance<S: StringProtocol>(in string: S) -> Int { string.distance(to: self) }
}
let string = "Curaçao"
print(string[0..<string.count])
print(string.highlight(substring: "aca"))
print("Curaçaoçao".highlight(substring: "acaoca"))
我想从字符串中提取一个子字符串,忽略重音和大小写。
例如,如果字符串是 Curaçao
并且输入子字符串 aca
,它将把字符串拆分为三个子字符串:Cur
(匹配前的子字符串),aça
(匹配子串)和o
(匹配后的剩余子串)。
我试过这段代码:
extension String {
subscript(offset: Int) -> Character { self[index(startIndex, offsetBy: offset)] }
subscript(range: Range<Int>) -> SubSequence {
let startIndex = index(self.startIndex, offsetBy: range.lowerBound)
return self[startIndex..<index(startIndex, offsetBy: range.count)]
}
subscript(range: ClosedRange<Int>) -> SubSequence {
let startIndex = index(self.startIndex, offsetBy: range.lowerBound)
return self[startIndex..<index(startIndex, offsetBy: range.count)]
}
subscript(range: PartialRangeFrom<Int>) -> SubSequence { self[index(startIndex, offsetBy: range.lowerBound)...] }
subscript(range: PartialRangeThrough<Int>) -> SubSequence { self[...index(startIndex, offsetBy: range.upperBound)] }
subscript(range: PartialRangeUpTo<Int>) -> SubSequence { self[..<index(startIndex, offsetBy: range.upperBound)] }
func highlight(substring: String) -> String {
if let range = lowercased()
.folding(options: .diacriticInsensitive, locale: .current)
.range(of: substring
.lowercased()
.folding(options: .diacriticInsensitive, locale: .current)) {
let startPos = distance(from: startIndex, to: range.lowerBound)
let endPos = distance(from: startIndex, to: range.upperBound)
let firstSubstring = self[..<startPos]
let secondSubstring = self[range]
let thirdSubstring = self[endPos...]
return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
} else {
return ""
}
}
}
但是,尝试时:
print("Curaçao".highlight(substring: "aca"))
Cur[aç]ao
另一个例子:
print("Curaçaoçao".highlight(substring: "acaoca"))
Cur[açao]çao
这是什么原因造成的?感谢您的帮助
您可以简单地使用 localizedStandardRange,它是变音符号且不区分大小写。顺便说一句,不需要将范围转换为整数:
localizedStandardRange(of:)
Finds and returns the range of the first occurrence of a given string within the string by performing a case and diacritic insensitive, locale-aware search.
extension StringProtocol where Self: RangeReplaceableCollection {
func highlight<S: StringProtocol>(substring: S) -> SubSequence? {
guard let range = localizedStandardRange(of: substring) else { return nil }
return self[..<range.lowerBound] + "[" + self[range] + "]" + self[range.upperBound...]
}
}
print("Curaçao".highlight(substring: "aca") ?? "")
print("Curaçaoçao".highlight(substring: "acaoca") ?? "")
这将打印
Cur[aça]o
Cur[açaoça]o
如果您只是想解决扩展程序的问题,只需将正确的选项 caseInsensitive
和 diacriticInsensitive
传递给您的范围方法,不要将范围转换为整数:
func highlight(substring: String) -> String {
if let range = range(of: substring, options: [.caseInsensitive, .diacriticInsensitive]) {
let firstSubstring = self[..<range.lowerBound]
let secondSubstring = self[range]
let thirdSubstring = self[range.upperBound]
return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
} else {
return ""
}
}
您的方法失败的原因是您搜索范围并计算不同字符串上的距离。
extension String {
subscript(_ range: CountableRange<Int>) -> String {
let start = index(startIndex, offsetBy: max(0, range.lowerBound))
let end = index(start, offsetBy: min(self.count - range.lowerBound,
range.upperBound - range.lowerBound))
return String(self[start..<end])
}
subscript(_ range: CountablePartialRangeFrom<Int>) -> String {
let start = index(startIndex, offsetBy: max(0, range.lowerBound))
return String(self[start...])
}
func highlight(substring: String) -> String {
let stringWithoutCaseOrLetters = self.folding(options: .diacriticInsensitive, locale: .current)
if let range = lowercased()
.folding(options: .diacriticInsensitive, locale: .current)
.range(of: substring
.lowercased()
.folding(options: .diacriticInsensitive, locale: .current)) {
let startPos:Int = stringWithoutCaseOrLetters.distance(from: startIndex, to: range.lowerBound)
let endPos:Int = stringWithoutCaseOrLetters.distance(from: startIndex, to: range.upperBound)
let firstSubstring = self[0..<startPos]
let secondSubstring = self[startPos..<endPos]
let thirdSubstring = self[endPos...]
return "\(firstSubstring)[\(secondSubstring)]\(thirdSubstring)"
}
return ""
}
}
extension StringProtocol {
func distance(of element: Element) -> Int? { firstIndex(of: element)?.distance(in: self) }
func distance<S: StringProtocol>(of string: S) -> Int? { range(of: string)?.lowerBound.distance(in: self) }
}
extension Collection {
func distance(to index: Index) -> Int { distance(from: startIndex, to: index) }
}
extension String.Index {
func distance<S: StringProtocol>(in string: S) -> Int { string.distance(to: self) }
}
let string = "Curaçao"
print(string[0..<string.count])
print(string.highlight(substring: "aca"))
print("Curaçaoçao".highlight(substring: "acaoca"))