0.字符串为不可变类型,内部使用指针指向UTF-8字节数组
不过要修改字符串可以先将其转换成[]byte或者[]rune。如下
package main
func main() {
s := "abcd"
bs := []byte(s)
bs[1] = 'B'
println(string(bs))
u := "电脑"
us := []rune(u)
us[1] = '话'
println(string(us))
}
输出:
aBcd
电话
for遍历字符串有byte和rune两种方式,见代码
package main
import (
"fmt"
)
func main() {
s := "abc汉字"
for i := 0; i < len(s); i++ { // byte
fmt.Printf("%c,", s[i])
}
fmt.Println()
for _, r := range s { // rune
fmt.Printf("%c,", r)
}
fmt.Println()
}
输出
a,b,c,æ,±,,å,,,
a,b,c,汉,字,
一些常用接口
1.判断是不是以某个字符串开头
package main
import (
"fmt"
"strings"
)
func main() {
str := "hello world"
res0 := strings.HasPrefix(str, "http://")
res1 := strings.HasPrefix(str, "hello")
fmt.Printf("res0 is %v\n", res0)
fmt.Printf("res1 is %v\n", res1)
}
输出
res0 is false
res1 is true
源码片段
// HasPrefix tests whether the string s begins with prefix.
func HasPrefix(s, prefix string) bool {
return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
}
2.判断是不是以某个字符串结尾
package main
import (
"fmt"
"strings"
)
func main() {
str := "hello world"
res0 := strings.HasSuffix(str, "http://")
res1 := strings.HasSuffix(str, "world")
fmt.Printf("res0 is %v\n", res0)
fmt.Printf("res1 is %v\n", res1)
}
输出
res0 is false res1 is true
源码片段
// HasSuffix tests whether the string s ends with suffix.
func HasSuffix(s, suffix string) bool {
return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
}
3. 判断str在s中首次出现的位置,如果没有返回-1
package main
import (
"fmt"
"strings"
)
func main() {
str := "hello world"
res0 := strings.Index(str, "o")
res1 := strings.Index(str, "i")
fmt.Printf("res0 is %v\n", res0)
fmt.Printf("res1 is %v\n", res1)
}
输出
res0 is 4
res1 is -1
源码片段
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
func Index(s, substr string) int {
n := len(substr)
switch {
case n == 0:
return 0
case n == 1:
return IndexByte(s, substr[0])
case n == len(s):
if substr == s {
return 0
}
return -1
case n > len(s):
return -1
case n <= bytealg.MaxLen:
// Use brute force when s and substr both are small
if len(s) <= bytealg.MaxBruteForce {
return bytealg.IndexString(s, substr)
}
c0 := substr[0]
c1 := substr[1]
i := 0
t := len(s) - n + 1
fails := 0
for i < t {
if s[i] != c0 {
// IndexByte is faster than bytealg.IndexString, so use it as long as
// we're not getting lots of false positives.
o := IndexByte(s[i:t], c0)
if o < 0 {
return -1
}
i += o
}
if s[i+1] == c1 && s[i:i+n] == substr {
return i
}
fails++
i++
// Switch to bytealg.IndexString when IndexByte produces too many false positives.
if fails > bytealg.Cutover(i) {
r := bytealg.IndexString(s[i:], substr)
if r >= 0 {
return r + i
}
return -1
}
}
return -1
}
c0 := substr[0]
c1 := substr[1]
i := 0
t := len(s) - n + 1
fails := 0
for i < t {
if s[i] != c0 {
o := IndexByte(s[i:t], c0)
if o < 0 {
return -1
}
i += o
}
if s[i+1] == c1 && s[i:i+n] == substr {
return i
}
i++
fails++
if fails >= 4+i>>4 && i < t {
// See comment in ../bytes/bytes_generic.go.
j := indexRabinKarp(s[i:], substr)
if j < 0 {
return -1
}
return i + j
}
}
return -1
}
// IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b.
func IndexByte(b []byte, c byte) int {
return bytealg.IndexByte(b, c)
}
在byteakg包中的IndexByte函数
package bytealg
func IndexByte(b []byte, c byte) int {
for i, x := range b {
if x == c {
return i
}
}
return -1
}
func IndexByteString(s string, c byte) int {
for i := 0; i < len(s); i++ {
if s[i] == c {
return i
}
}
return -1
}
package bytealg
const MaxBruteForce = 0
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
func Index(a, b []byte) int {
panic("unimplemented")
}
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
func IndexString(a, b string) int {
panic("unimplemented")
}
// Cutover reports the number of failures of IndexByte we should tolerate
// before switching over to Index.
// n is the number of bytes processed so far.
// See the bytes.Index implementation for details.
func Cutover(n int) int {
panic("unimplemented")
}
回到strings包
func indexRabinKarp(s, substr string) int {
// Rabin-Karp search
hashss, pow := hashStr(substr)
n := len(substr)
var h uint32
for i := 0; i < n; i++ {
h = h*primeRK + uint32(s[i])
}
if h == hashss && s[:n] == substr {
return 0
}
for i := n; i < len(s); {
h *= primeRK
h += uint32(s[i])
h -= pow * uint32(s[i-n])
i++
if h == hashss && s[i-n:i] == substr {
return i - n
}
}
return -1
}
4.判断str在s中最后一次出现的位置,如果没有返回-1
package main
import (
"fmt"
"strings"
)
func main() {
str := "hello world"
res0 := strings.LastIndex(str, "o")
res1 := strings.LastIndex(str, "i")
fmt.Printf("res0 is %v\n", res0)
fmt.Printf("res1 is %v\n", res1)
}
输出
res0 is 7
res1 is -1
源码片段
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
func LastIndex(s, sep []byte) int {
n := len(sep)
switch {
case n == 0:
return len(s)
case n == 1:
return LastIndexByte(s, sep[0])
case n == len(s):
if Equal(s, sep) {
return 0
}
return -1
case n > len(s):
return -1
}
// Rabin-Karp search from the end of the string
hashss, pow := hashStrRev(sep)
last := len(s) - n
var h uint32
for i := len(s) - 1; i >= last; i-- {
h = h*primeRK + uint32(s[i])
}
if h == hashss && Equal(s[last:], sep) {
return last
}
for i := last - 1; i >= 0; i-- {
h *= primeRK
h += uint32(s[i])
h -= pow * uint32(s[i+n])
if h == hashss && Equal(s[i:i+n], sep) {
return i
}
}
return -1
}
func LastIndexByte(s []byte, c byte) int
func LastIndexByte(s []byte, c byte) int {
for i := len(s) - 1; i >= 0; i-- {
if s[i] == c {
return i
}
}
return -1
}
func hashStrRev(sep []byte) (uint32, uint32) {
hash := uint32(0)
for i := len(sep) - 1; i >= 0; i-- {
hash = hash*primeRK + uint32(sep[i])
}
var pow, sq uint32 = 1, primeRK
for i := len(sep); i > 0; i >>= 1 {
if i&1 != 0 {
pow *= sq
}
sq *= sq
}
return hash, pow
}
const primeRK = 16777619
// Equal reports whether a and b
// are the same length and contain the same bytes.
// A nil argument is equivalent to an empty slice.
func Equal(a, b []byte) bool {
// Neither cmd/compile nor gccgo allocates for these string conversions.
return string(a) == string(b)
}
5.字符串替换
package main
import (
"fmt"
"strings"
)
func main() {
str := "hello world world"
res0 := strings.Replace(str, "world", "golang", 2)
res1 := strings.Replace(str, "world", "golang", 1)
//trings.Replace("原字符串", "被替换的内容", "替换的内容", 替换次数)
fmt.Printf("res0 is %v\n", res0)
fmt.Printf("res1 is %v\n", res1)
}
输出
res0 is hello golang golang
res1 is hello golang world
源码片段
// Replace returns a copy of the string s with the first n
// non-overlapping instances of old replaced by new.
// If old is empty, it matches at the beginning of the string
// and after each UTF-8 sequence, yielding up to k+1 replacements
// for a k-rune string.
// If n < 0, there is no limit on the number of replacements.
func Replace(s, old, new string, n int) string {
if old == new || n == 0 {
return s // avoid allocation
}
// Compute number of replacements.
if m := Count(s, old); m == 0 {
return s // avoid allocation
} else if n < 0 || m < n {
n = m
}
// Apply replacements to buffer.
t := make([]byte, len(s)+n*(len(new)-len(old)))
w := 0
start := 0
for i := 0; i < n; i++ {
j := start
if len(old) == 0 {
if i > 0 {
_, wid := utf8.DecodeRuneInString(s[start:])
j += wid
}
} else {
j += Index(s[start:], old)
}
w += copy(t[w:], s[start:j])
w += copy(t[w:], new)
start = j + len(old)
}
w += copy(t[w:], s[start:])
return string(t[0:w])
}
函数Count
// Count counts the number of non-overlapping instances of sep in s.
// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
func Count(s, sep []byte) int {
// special case
if len(sep) == 0 {
return utf8.RuneCount(s) + 1
}
if len(sep) == 1 {
return bytealg.Count(s, sep[0])
}
n := 0
for {
i := Index(s, sep)
if i == -1 {
return n
}
n++
s = s[i+len(sep):]
}
}
utf8包中的utf8.DecodeRuneInString
func DecodeRuneInString(s string) (r rune, size int) {
n := len(s)
if n < 1 {
return RuneError, 0
}
s0 := s[0]
x := first[s0]
if x >= as {
// The following code simulates an additional check for x == xx and
// handling the ASCII and invalid cases accordingly. This mask-and-or
// approach prevents an additional branch.
mask := rune(x) << 31 >> 31 // Create 0x0000 or 0xFFFF.
return rune(s[0])&^mask | RuneError&mask, 1
}
sz := int(x & 7)
accept := acceptRanges[x>>4]
if n < sz {
return RuneError, 1
}
s1 := s[1]
if s1 < accept.lo || accept.hi < s1 {
return RuneError, 1
}
if sz <= 2 { // <= instead of == to help the compiler eliminate some bounds checks
return rune(s0&mask2)<<6 | rune(s1&maskx), 2
}
s2 := s[2]
if s2 < locb || hicb < s2 {
return RuneError, 1
}
if sz <= 3 {
return rune(s0&mask3)<<12 | rune(s1&maskx)<<6 | rune(s2&maskx), 3
}
s3 := s[3]
if s3 < locb || hicb < s3 {
return RuneError, 1
}
return rune(s0&mask4)<<18 | rune(s1&maskx)<<12 | rune(s2&maskx)<<6 | rune(s3&maskx), 4
}
待续...
部分代码节选https://www.kancloud.cn/liupengjie/go/570004