package main import ( "fmt" "github.com/gocolly/colly" "log" "os" "strings" ) func main() { var ch chan map[string]string ch = make(chan map[string]string, 20); go getList(ch) for { select { case msg, ok:= <-ch: if !ok { fmt.Println("closed") //break end } fmt.Println(msg) getDetail(msg) default: } } fmt.Println("结束了") } //获取文章目录 func getList(ch chan map[string]string) { c1 := colly.NewCollector(colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"), colly.MaxDepth(1)) //采集器1,获取文章列表 c1.OnHTML("div[id='list'] dl", func(e *colly.HTMLElement) { e.ForEach("dd", func(i int, item *colly.HTMLElement) { href := item.ChildAttr("a", "href") title := item.ChildText("a") ctx := colly.NewContext() ctx.Put("href", href) ctx.Put("title", title) //将详情连接加入队列 ch<- map[string]string{ "href":href, "title":title, } }) close(ch) }) c1.OnRequest(func(r *colly.Request) { fmt.Println("c1爬取页面:", r.URL) }) c1.OnError(func(r *colly.Response, err error) { fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err) }) err := c1.Visit(网站文章目录地址) if err != nil { fmt.Println(err.Error()) } } //获取详情 func getDetail(data map[string]string) { c1 := colly.NewCollector(colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"), colly.MaxDepth(1)) //采集器1,获取文章列表 c1.OnHTML("div[id='content']", func(e *colly.HTMLElement) { detail := e.Text if len(detail) > 0 { if strings.Index(data["title"],":") > 0 { data["title"] = strings.ReplaceAll(data["title"], ":",":") } arr := strings.Split(data["href"],"/") logRecord(data["title"],arr[1], detail) } }) c1.OnRequest(func(r *colly.Request) { fmt.Println("c1爬取页面:", r.URL) }) c1.OnError(func(r *colly.Response, err error) { fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err) }) err := c1.Visit(网站域名+data["href"]) if err != nil { fmt.Println(err.Error()) } } //日志记录文件 func logRecord(filename string ,dir string , content string) { path := "./runtime/"+dir+"/" _, err := os.Stat(path) if err != nil { os.Mkdir(path, os.ModePerm) } //判断文件是否存在 fileInfo := path+filename+".log" _,errs := os.Stat(fileInfo) if !os.IsNotExist(errs){ //如果返回的错误类型使用os.isNotExist()判断为true,说明文件或者文件夹不存在 return } logFile, err := os.OpenFile(fileInfo, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666) if err != nil { panic(err) } // 设置存储位置 log.SetOutput(logFile) log.Print(content) }