1.对于已知的编码
golang.org/x/text
import (
"fmt"
//_ "golang.org/x/text"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
"io/ioutil"
"net/http"
)
func main(){
//请求页面,获取
response,_:=http.Get("http://www.xxx.com")
//编码转换
reader := transform.NewReader(response.Body, simplifiedchinese.GBK.NewDecoder())
bytes, err := ioutil.ReadAll(reader)
if err !=nil {
panic(err)
}
fmt.Printf("%s",bytes)
}
2.对于不确定的编码,例如html页面内容
golang.org/x/net/html
package main
//import有点复杂 这是goland自动生成的,用不上的去掉也ok
import (
"bufio"
"fmt"
_ "golang.org/x/net/html"
"golang.org/x/net/html/charset"
_ "golang.org/x/text"
"golang.org/x/text/encoding"
_ "golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
"io"
"io/ioutil"
"net/http"
)
func main(){
//请求页面,获取
response,_:=http.Get("http://www.xxx.com")
//编码转换,自动检测网页编码
bodyReader := bufio.NewReader(response.Body)
charset:=determinePageEncoding(bodyReader)
reader := transform.NewReader(response.Body, charset.NewDecoder())
bytes, err := ioutil.ReadAll(reader)
if err !=nil {
panic(err)
}
fmt.Printf("%s",bytes)
}
//检测html页面编码
func determinePageEncoding(r *bufio.Reader)encoding.Encoding{
//使用peek读取十分关键,只是偷看一下,不会移动读取位置,否则其他地方就没法读取了
bytes, err := r.Peek(1024)
if err !=nil {
log.Printf("Fetcher error: %v\n",err)
return unicode.UTF8
}
e, _, _ := charset.DetermineEncoding(bytes, "")
return e
}