如何 select first chars with a custom word boundary?

How to select first chars with a custom word boundary?

我用这样的一系列单词测试过案例:

    {
        input:    "Halley's Comet",
        expected: "HC",
    },
    {
        input:    "First In, First Out",
        expected: "FIFO",
    },
    {
        input:    "The Road _Not_ Taken",
        expected: "TRNT",
    },

我想用一个正则表达式来匹配这些单词的所有首字母,避免将 char: "_" 作为首字母匹配并计算单词中的单引号。
目前,我有这个正则表达式在 pcre 语法上工作,但没有使用 Go regexp 包:(?<![a-zA-Z0-9'])([a-zA-Z0-9'])
我知道 Go 不支持环顾四周,但我正在寻找一种好的方法。

我也使用这个函数来获取所有字符串的数组:re.FindAllString(s, -1)

感谢您的帮助。

玩弄字符 类 和单词边界的东西应该足够了:

\b_*([a-z])[a-z]*(?:'s)?_*\b\W*

demo

用法:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    re := regexp.MustCompile(`(?i)\b_*([a-z])[a-z]*(?:'s)?_*\b\W*`)
    fmt.Println(re.ReplaceAllString("O'Brian's dog", ""))

}

ftr, regexp less 解决方案

package main

import (
    "fmt"
)

func main() {
    inputs := []string{"Hallمرحباey's Comet", "First In, First Out", "The Road _Not_ Taken", "O'Brian's Dog"}
    c := [][]string{}
    w := [][]string{}
    for _, input := range inputs {
        c = append(c, firstLet(input))
        w = append(w, words(input))
    }
    fmt.Printf("%#v\n", w)
    fmt.Printf("%#v\n", c)
}

func firstLet(in string) (out []string) {
    var inword bool
    for _, r := range in {
        if !inword {
            if isChar(r) {
                inword = true
                out = append(out, string(r))
            }
        } else if r == ' ' {
            inword = false
        }
    }
    return out
}

func words(in string) (out []string) {
    var inword bool
    var w []rune
    for _, r := range in {
        if !inword {
            if isChar(r) {
                w = append(w, r)
                inword = true
            }
        } else if r == ' ' {
            if len(w) > 0 {
                out = append(out, string(w))
                w = w[:0]
            }
            inword = false
        } else if r != '_' {
            w = append(w, r)
        }
    }
    if len(w) > 0 {
        out = append(out, string(w))
    }
    return out
}

func isChar(r rune) bool {
    return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')
}

产出

[][]string{[]string{"Hallمرحباey's", "Comet"}, []string{"First", "In,", "First", "Out"}, []string{"The", "Road", "Not", "Taken"}, []string{"O'Brian's", "Dog"}}
[][]string{[]string{"H", "C"}, []string{"F", "I", "F", "O"}, []string{"T", "R", "N", "T"}, []string{"O", "D"}}