package main
import (
"encoding/json"
"fmt"
"regexp"
"github.com/dop251/goja"
"github.com/gocolly/colly"
)
var (
token string
)
func CallJsCode(keyword string) float64 {
const script = `
var i = "320305.131321201"
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
function e(r) {
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
}
`
vm := goja.New()
_, err := vm.RunString(script)
if err != nil {
fmt.Println("JS代码有问题!")
return 0
}
var fn func(string) float64
err = vm.ExportTo(vm.Get("e"), &fn)
if err != nil {
fmt.Println("Js函数映射到 Go 函数失败!")
return 0
}
return fn(keyword)
}
type Result struct {
TransResult Trans `json:"trans_result"`
}
type Trans struct {
Data []TransData `json:"data"`
From string `json:"from"`
To string `json:"to"`
}
type TransData struct {
Dst string `json:"dst"`
Src string `json:"src"`
}
func main() {
var keyword string = "hello world!!!"
c := colly.NewCollector()
c.OnRequest(func(r *colly.Request) {
r.Headers.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36")
r.Headers.Set("x-requested-with", "XMLHttpRequest")
r.Headers.Set("origin", "https://fanyi.baidu.com")
r.Headers.Set("referer", "https://fanyi.baidu.com/?aldtype=16047")
fmt.Println("Visiting", r.URL.String())
})
c.OnResponse(func(r *colly.Response) {
url := fmt.Sprintf("%v", r.Request.URL)
if url == "https://fanyi.baidu.com/langdetect" {
requestData := map[string]string{
"query": keyword,
}
c.Post("https://fanyi.baidu.com/", requestData)
} else if url == "https://fanyi.baidu.com/" {
body := r.Body
re := regexp.MustCompile(`token: '(.*?)'`)
match := re.FindStringSubmatch(string(body))
token = match[1]
requestData := map[string]string{
"from": "en",
"to": "zh",
"query": keyword,
"transtype": "realtime",
"simple_means_flag": "3",
"sign": fmt.Sprint(CallJsCode(keyword)),
"token": token,
"domain": "common",
}
c.Post("https://fanyi.baidu.com/v2transapi?from=en&to=zh", requestData)
} else {
body := r.Body
var res Result
json.Unmarshal(body, &res)
fmt.Printf("%+v\n", res)
}
})
c.Visit("https://fanyi.baidu.com/langdetect")
}
go的爬虫工具教你如何去翻译(go调用js,colly的使用)
go的爬虫工具教你如何去翻译(go调用js,colly的使用)
分析过程
go代码