0.字符串为不可变类型,内部使用指针指向UTF-8字节数组
不过要修改字符串可以先将其转换成[]byte或者[]rune。如下
package main func main() { s := "abcd" bs := []byte(s) bs[1] = 'B' println(string(bs)) u := "电脑" us := []rune(u) us[1] = '话' println(string(us)) }
输出:
aBcd
电话
for遍历字符串有byte和rune两种方式,见代码
package main import ( "fmt" ) func main() { s := "abc汉字" for i := 0; i < len(s); i++ { // byte fmt.Printf("%c,", s[i]) } fmt.Println() for _, r := range s { // rune fmt.Printf("%c,", r) } fmt.Println() }
输出
a,b,c,æ,±,,å,,,
a,b,c,汉,字,
一些常用接口
1.判断是不是以某个字符串开头
package main import ( "fmt" "strings" ) func main() { str := "hello world" res0 := strings.HasPrefix(str, "http://") res1 := strings.HasPrefix(str, "hello") fmt.Printf("res0 is %v\n", res0) fmt.Printf("res1 is %v\n", res1) }
输出
res0 is false
res1 is true
源码片段
// HasPrefix tests whether the string s begins with prefix. func HasPrefix(s, prefix string) bool { return len(s) >= len(prefix) && s[0:len(prefix)] == prefix }
2.判断是不是以某个字符串结尾
package main import ( "fmt" "strings" ) func main() { str := "hello world" res0 := strings.HasSuffix(str, "http://") res1 := strings.HasSuffix(str, "world") fmt.Printf("res0 is %v\n", res0) fmt.Printf("res1 is %v\n", res1) }
输出
res0 is false res1 is true
源码片段
// HasSuffix tests whether the string s ends with suffix. func HasSuffix(s, suffix string) bool { return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix }
3. 判断str在s中首次出现的位置,如果没有返回-1
package main import ( "fmt" "strings" ) func main() { str := "hello world" res0 := strings.Index(str, "o") res1 := strings.Index(str, "i") fmt.Printf("res0 is %v\n", res0) fmt.Printf("res1 is %v\n", res1) }
输出
res0 is 4
res1 is -1
源码片段
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. func Index(s, substr string) int { n := len(substr) switch { case n == 0: return 0 case n == 1: return IndexByte(s, substr[0]) case n == len(s): if substr == s { return 0 } return -1 case n > len(s): return -1 case n <= bytealg.MaxLen: // Use brute force when s and substr both are small if len(s) <= bytealg.MaxBruteForce { return bytealg.IndexString(s, substr) } c0 := substr[0] c1 := substr[1] i := 0 t := len(s) - n + 1 fails := 0 for i < t { if s[i] != c0 { // IndexByte is faster than bytealg.IndexString, so use it as long as // we're not getting lots of false positives. o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } if s[i+1] == c1 && s[i:i+n] == substr { return i } fails++ i++ // Switch to bytealg.IndexString when IndexByte produces too many false positives. if fails > bytealg.Cutover(i) { r := bytealg.IndexString(s[i:], substr) if r >= 0 { return r + i } return -1 } } return -1 } c0 := substr[0] c1 := substr[1] i := 0 t := len(s) - n + 1 fails := 0 for i < t { if s[i] != c0 { o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } if s[i+1] == c1 && s[i:i+n] == substr { return i } i++ fails++ if fails >= 4+i>>4 && i < t { // See comment in ../bytes/bytes_generic.go. j := indexRabinKarp(s[i:], substr) if j < 0 { return -1 } return i + j } } return -1 }
// IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b. func IndexByte(b []byte, c byte) int { return bytealg.IndexByte(b, c) }
在byteakg包中的IndexByte函数
package bytealg func IndexByte(b []byte, c byte) int { for i, x := range b { if x == c { return i } } return -1 } func IndexByteString(s string, c byte) int { for i := 0; i < len(s); i++ { if s[i] == c { return i } } return -1 }
package bytealg const MaxBruteForce = 0 // Index returns the index of the first instance of b in a, or -1 if b is not present in a. // Requires 2 <= len(b) <= MaxLen. func Index(a, b []byte) int { panic("unimplemented") } // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a. // Requires 2 <= len(b) <= MaxLen. func IndexString(a, b string) int { panic("unimplemented") } // Cutover reports the number of failures of IndexByte we should tolerate // before switching over to Index. // n is the number of bytes processed so far. // See the bytes.Index implementation for details. func Cutover(n int) int { panic("unimplemented") }
回到strings包
func indexRabinKarp(s, substr string) int { // Rabin-Karp search hashss, pow := hashStr(substr) n := len(substr) var h uint32 for i := 0; i < n; i++ { h = h*primeRK + uint32(s[i]) } if h == hashss && s[:n] == substr { return 0 } for i := n; i < len(s); { h *= primeRK h += uint32(s[i]) h -= pow * uint32(s[i-n]) i++ if h == hashss && s[i-n:i] == substr { return i - n } } return -1 }
4.判断str在s中最后一次出现的位置,如果没有返回-1
package main import ( "fmt" "strings" ) func main() { str := "hello world" res0 := strings.LastIndex(str, "o") res1 := strings.LastIndex(str, "i") fmt.Printf("res0 is %v\n", res0) fmt.Printf("res1 is %v\n", res1) }
输出
res0 is 7
res1 is -1
源码片段
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. func LastIndex(s, sep []byte) int { n := len(sep) switch { case n == 0: return len(s) case n == 1: return LastIndexByte(s, sep[0]) case n == len(s): if Equal(s, sep) { return 0 } return -1 case n > len(s): return -1 } // Rabin-Karp search from the end of the string hashss, pow := hashStrRev(sep) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { h = h*primeRK + uint32(s[i]) } if h == hashss && Equal(s[last:], sep) { return last } for i := last - 1; i >= 0; i-- { h *= primeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashss && Equal(s[i:i+n], sep) { return i } } return -1 }
func LastIndexByte(s []byte, c byte) int
func LastIndexByte(s []byte, c byte) int { for i := len(s) - 1; i >= 0; i-- { if s[i] == c { return i } } return -1 }
func hashStrRev(sep []byte) (uint32, uint32) { hash := uint32(0) for i := len(sep) - 1; i >= 0; i-- { hash = hash*primeRK + uint32(sep[i]) } var pow, sq uint32 = 1, primeRK for i := len(sep); i > 0; i >>= 1 { if i&1 != 0 { pow *= sq } sq *= sq } return hash, pow }
const primeRK = 16777619
// Equal reports whether a and b // are the same length and contain the same bytes. // A nil argument is equivalent to an empty slice. func Equal(a, b []byte) bool { // Neither cmd/compile nor gccgo allocates for these string conversions. return string(a) == string(b) }
5.字符串替换
package main import ( "fmt" "strings" ) func main() { str := "hello world world" res0 := strings.Replace(str, "world", "golang", 2) res1 := strings.Replace(str, "world", "golang", 1) //trings.Replace("原字符串", "被替换的内容", "替换的内容", 替换次数) fmt.Printf("res0 is %v\n", res0) fmt.Printf("res1 is %v\n", res1) }
输出
res0 is hello golang golang
res1 is hello golang world
源码片段
// Replace returns a copy of the string s with the first n // non-overlapping instances of old replaced by new. // If old is empty, it matches at the beginning of the string // and after each UTF-8 sequence, yielding up to k+1 replacements // for a k-rune string. // If n < 0, there is no limit on the number of replacements. func Replace(s, old, new string, n int) string { if old == new || n == 0 { return s // avoid allocation } // Compute number of replacements. if m := Count(s, old); m == 0 { return s // avoid allocation } else if n < 0 || m < n { n = m } // Apply replacements to buffer. t := make([]byte, len(s)+n*(len(new)-len(old))) w := 0 start := 0 for i := 0; i < n; i++ { j := start if len(old) == 0 { if i > 0 { _, wid := utf8.DecodeRuneInString(s[start:]) j += wid } } else { j += Index(s[start:], old) } w += copy(t[w:], s[start:j]) w += copy(t[w:], new) start = j + len(old) } w += copy(t[w:], s[start:]) return string(t[0:w]) }
函数Count
// Count counts the number of non-overlapping instances of sep in s. // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. func Count(s, sep []byte) int { // special case if len(sep) == 0 { return utf8.RuneCount(s) + 1 } if len(sep) == 1 { return bytealg.Count(s, sep[0]) } n := 0 for { i := Index(s, sep) if i == -1 { return n } n++ s = s[i+len(sep):] } }
utf8包中的utf8.DecodeRuneInString
func DecodeRuneInString(s string) (r rune, size int) { n := len(s) if n < 1 { return RuneError, 0 } s0 := s[0] x := first[s0] if x >= as { // The following code simulates an additional check for x == xx and // handling the ASCII and invalid cases accordingly. This mask-and-or // approach prevents an additional branch. mask := rune(x) << 31 >> 31 // Create 0x0000 or 0xFFFF. return rune(s[0])&^mask | RuneError&mask, 1 } sz := int(x & 7) accept := acceptRanges[x>>4] if n < sz { return RuneError, 1 } s1 := s[1] if s1 < accept.lo || accept.hi < s1 { return RuneError, 1 } if sz <= 2 { // <= instead of == to help the compiler eliminate some bounds checks return rune(s0&mask2)<<6 | rune(s1&maskx), 2 } s2 := s[2] if s2 < locb || hicb < s2 { return RuneError, 1 } if sz <= 3 { return rune(s0&mask3)<<12 | rune(s1&maskx)<<6 | rune(s2&maskx), 3 } s3 := s[3] if s3 < locb || hicb < s3 { return RuneError, 1 } return rune(s0&mask4)<<18 | rune(s1&maskx)<<12 | rune(s2&maskx)<<6 | rune(s3&maskx), 4 }
待续...
部分代码节选https://www.kancloud.cn/liupengjie/go/570004