golang colly抓包实战
package main
import (
"fmt"
"github.com/gocolly/colly"
"log"
"os"
"strings"
)
func main() {
var ch chan map[string]string
ch = make(chan map[string]string, 20);
go getList(ch)
for {
select {
case msg, ok:= <-ch:
if !ok {
fmt.Println("closed")
//break end
}
fmt.Println(msg)
getDetail(msg)
default:
}
}
fmt.Println("结束了")
}
//获取文章目录
func getList(ch chan map[string]string) {
c1 := colly.NewCollector(colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"), colly.MaxDepth(1))
//采集器1,获取文章列表
c1.OnHTML("div[id='list'] dl", func(e *colly.HTMLElement) {
e.ForEach("dd", func(i int, item *colly.HTMLElement) {
href := item.ChildAttr("a", "href")
title := item.ChildText("a")
ctx := colly.NewContext()
ctx.Put("href", href)
ctx.Put("title", title)
//将详情连接加入队列
ch<- map[string]string{
"href":href,
"title":title,
}
})
close(ch)
})
c1.OnRequest(func(r *colly.Request) {
fmt.Println("c1爬取页面:", r.URL)
})
c1.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err)
})
err := c1.Visit(网站文章目录地址)
if err != nil {
fmt.Println(err.Error())
}
}
//获取详情
func getDetail(data map[string]string) {
c1 := colly.NewCollector(colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"), colly.MaxDepth(1))
//采集器1,获取文章列表
c1.OnHTML("div[id='content']", func(e *colly.HTMLElement) {
detail := e.Text
if len(detail) > 0 {
if strings.Index(data["title"],":") > 0 {
data["title"] = strings.ReplaceAll(data["title"], ":",":")
}
arr := strings.Split(data["href"],"/")
logRecord(data["title"],arr[1], detail)
}
})
c1.OnRequest(func(r *colly.Request) {
fmt.Println("c1爬取页面:", r.URL)
})
c1.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err)
})
err := c1.Visit(网站域名+data["href"])
if err != nil {
fmt.Println(err.Error())
}
}
//日志记录文件
func logRecord(filename string ,dir string , content string) {
path := "./runtime/"+dir+"/"
_, err := os.Stat(path)
if err != nil {
os.Mkdir(path, os.ModePerm)
}
//判断文件是否存在
fileInfo := path+filename+".log"
_,errs := os.Stat(fileInfo)
if !os.IsNotExist(errs){ //如果返回的错误类型使用os.isNotExist()判断为true,说明文件或者文件夹不存在
return
}
logFile, err := os.OpenFile(fileInfo, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
panic(err)
}
// 设置存储位置
log.SetOutput(logFile)
log.Print(content)
}