golang 使用线程池进行高并发爬虫

gospider 介绍

gospider 是一个golang 爬虫神器,拥有python到golang爬虫过渡的所有必需库。用于python爬虫从业者快速且无坑的过渡到golang

安装

go get -u gitee.com/baixudong/gospider

gitee地址

https://gitee.com/baixudong/gospider

github地址

https://github.com/baixudong007/gospider

代码示例

package main

import (
    "context"
    "log"
    "net/url"

    "gitee.com/baixudong/gospider/requests"
    "gitee.com/baixudong/gospider/thread"
)

var reqCli *requests.Client

func init() {
    var err error
    reqCli, err = requests.NewClient(nil, requests.ClientOption{
        GetProxy: func(ctx context.Context, url *url.URL) (string, error) { //自动获取代理
            return "", nil
        },
    })
    if err != nil {
        log.Panic(err)
    }
}
func test(ctx context.Context, num int) {
    log.Printf("第%d个请求开始", num)
    resp, err := reqCli.Request(ctx, "get", "http://myip.top")
    if err != nil {
        log.Printf("第%d个请求失败%s", num, err.Error())
    } else {
        log.Printf("第%d个请求成功%d", num, resp.StatusCode())
    }
    log.Printf("第%d个请求结束", num)
}
func main() {
    threadCli := thread.NewClient(nil, 3) //限制并发为3
    for i := 0; i < 10; i++ {
        //读取任务
        threadCli.Write(&thread.Task{
            Func: test,
            Args: []any{i},
        })
    }
    threadCli.Join()
}