package main
import (
"fmt"
"regexp"
)
func main() {
text := `<p>更多分析师观点详见
<a href="https://m.xxx.cn/article/160267" target="_blank" rel="noopener">
一周策略前瞻:周期之火破灭了?</a></p>
<p><strong>来看主题:</strong></p>
<p><strong>
1、<a class="" href="https://api.xxx.cn/web/subjects/186">PPP</a> :
</strong>
<a href="https://m.xxx.cn/article/160320" target="_blank" rel="noopener">
国办发文力促民资参与PPP,经济回落下行业有望再成>稳增长抓手 </a></p>
<p>参见上文逻辑,此处不多说了。地产板块也是类似。</p>`
//var SubjectRegexp = regexp.MustCompile(`<a href="https://[[:ascii:]]*">(?P<ct>.*)</a>`)
var ArticleRegexp = regexp.MustCompile(`<a.*href="https://(m|api).xxx.cn(.*)/(article|subjects)/[\d]+"(.*)>(.+)</a>`)
fmt.Println(ArticleRegexp.FindAllString(text, -1))
//fmt.Println(SubjectRegexp.FindAllString(text, -1))
//fmt.Println(SubjectRegexp.ReplaceAllString(text, `${ct}`))
text2 := `I'm singing while you're dancing.`
RegExpIng := regexp.MustCompile(`((\')\w{1,2})`)
fmt.Println(RegExpIng.FindAllString(text2, -1))
}
Output:
[<a href="https://m.xxx.cn/article/160267" target="_blank" rel="noopener">一周策略前瞻:周期之火破灭了?</a> <a class="" href="https://api.xxx.cn/web/subjects/186">PPP</a> :</strong><a href="https://m.xxx.cn/article/160320" target="_blank" rel="noopener">国办发文力促民资参与PPP,经济回落下行业有望再成稳增长抓手 </a>]
['m 're]
1. VS
func Compile(expr string) (*Regexp, error) {
return compile(expr, syntax.Perl, false)
}
MustComile实际上调用的是Compile。加了错误检测。
func MustCompile(str string) *Regexp {
regexp, error := Compile(str)
if error != nil {
panic(`regexp: Compile(` + quote(str) + `): ` + error.Error())
}
return regexp
}
2. MatchString检测是否匹配正则,参数为被检测的字符串,返回布尔值
// MatchString reports whether the string s
// contains any match of the regular expression re.
func (re *Regexp) MatchString(s string) bool {
return re.doMatch(nil, nil, s)
}
3.
有两个参数,第一个参数为要处理的字符串,第二个参数获取匹配的结果数量,如果为负数,则取出所有满足条件的匹配结果
// FindAllString is the 'All' version of FindString; it returns a slice of all
// successive matches of the expression, as defined by the 'All' description
// in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindAllString(s string, n int) []string {
if n < 0 {
n = len(s) + 1
}
result := make([]string, 0, startSize)
re.allMatches(s, nil, n, func(match []int) {
result = append(result, s[match[0]:match[1]])
})
if len(result) == 0 {
return nil
}
return result
}
其中核心是调用了 allMatches的私有方法获取的结果。该方法的第一个参数为要处理的文本字符串,第二个参数为字节数字切片,在FindAllString中使用的空指针。第三个参数为FindAllString的第二个参数n,第四个参数为一个函数,它负责把所有的收集。
// Find matches in slice b if b is non-nil, otherwise find matches in string s.
func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
var end int
if b == nil {
end = len(s)
} else {
end = len(b)
}
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
if len(matches) == 0 {
break
}
accept := true
if matches[1] == pos {
// We've found an empty match.
if matches[0] == prevMatchEnd {
// We don't allow an empty match right
// after a previous match, so ignore it.
accept = false
}
var width int
// TODO: use step()
if b == nil {
_, width = utf8.DecodeRuneInString(s[pos:end])
} else {
_, width = utf8.DecodeRune(b[pos:end])
}
if width > 0 {
pos += width
} else {
pos = end + 1
}
} else {
pos = matches[1]
}
prevMatchEnd = matches[1]
if accept {
deliver(re.pad(matches))
i++
}
}
}
re := regexp.MustCompile("a.")
fmt.Println(re.FindAllString("paranormal", -1))
fmt.Println(re.FindAllString("paranormal", 2))
fmt.Println(re.FindAllString("graal", -1))
fmt.Println(re.FindAllString("none", -1))
Output:
[ar an al]
[ar an]
[aa]
[]
4.
替换所有匹配到的结果为指定的字符串。第二个参数给出了要替换的值
func (re *Regexp) ReplaceAllString(src, repl string) string {
n := 2
if strings.Contains(repl, "$") {
n = 2 * (re.numSubexp + 1)
}
b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
return re.expand(dst, repl, nil, src, match)
})
return string(b)
}
replaceAll
func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
lastMatchEnd := 0 // end position of the most recent match
searchPos := 0 // position where we next look for a match
var buf []byte
var endPos int
if bsrc != nil {
endPos = len(bsrc)
} else {
endPos = len(src)
}
if nmatch > re.prog.NumCap {
nmatch = re.prog.NumCap
}
var dstCap [2]int
for searchPos <= endPos {
a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
if len(a) == 0 {
break // no more matches
}
// Copy the unmatched characters before this match.
if bsrc != nil {
buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
} else {
buf = append(buf, src[lastMatchEnd:a[0]]...)
}
// Now insert a copy of the replacement string, but not for a
// match of the empty string immediately after another match.
// (Otherwise, we get double replacement for patterns that
// match both empty and nonempty strings.)
if a[1] > lastMatchEnd || a[0] == 0 {
buf = repl(buf, a)
}
lastMatchEnd = a[1]
// Advance past this match; always advance at least one character.
var width int
if bsrc != nil {
_, width = utf8.DecodeRune(bsrc[searchPos:])
} else {
_, width = utf8.DecodeRuneInString(src[searchPos:])
}
if searchPos+width > a[1] {
searchPos += width
} else if searchPos+1 > a[1] {
// This clause is only needed at the end of the input
// string. In that case, DecodeRuneInString returns width=0.
searchPos++
} else {
searchPos = a[1]
}
}
// Copy the unmatched characters after the last match.
if bsrc != nil {
buf = append(buf, bsrc[lastMatchEnd:]...)
} else {
buf = append(buf, src[lastMatchEnd:]...)
}
return buf
}
5. ReplaceAllStringFunc
func ConvertTabToEmptyString(text string) string {
return TabRegExp.ReplaceAllStringFunc(text, func(matchedStr string) string {
return strings.Replace(matchedStr, " ", " ", -1)
})
}
6.FindAllStringSubmatch
找出有匹配到的字符串子组列表,第二个参数小于0,表示全部匹配
// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
// returns a slice of all successive matches of the expression, as defined by
// the 'All' description in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
if n < 0 {
n = len(s) + 1
}
var result [][]string
re.allMatches(s, nil, n, func(match []int) {
if result == nil {
result = make([][]string, 0, startSize)
}
slice := make([]string, len(match)/2)
for j := range slice {
if match[2*j] >= 0 {
slice[j] = s[match[2*j]:match[2*j+1]]
}
}
result = append(result, slice)
})
return result
}
示例:
func AlliRemLinkUrls(articleArr []*Article) {
for _, article := range articleArr {
if strArrArr := LinkUrlRegExp.FindAllStringSubmatch(article.Content, -1); strArrArr != nil {
for _, strArr := range strArrArr {
article.Content = strings.Replace(article.Content, strArr[0], strArr[3], 1)
}
}
}
}