❝Python爬虫可能大家都玩腻了,那就玩一下Golang的爬虫吧!
❞
原图.xmindGo爬虫
net/httprequestresponse
构造客户端
var client http.Client
构造GET请求:
reqList, err := http.NewRequest("GET", URL, nil)
构造POST请求
cookiejar.NewCookie存储用户信息是谁cookie
jar, err := cookiejar.New(nil) if err != nil { panic(err) }
POSTURL
var client http.Client Info :="muser="+muserid+"&"+"passwd="+password var data = strings.NewReader(Info) req, err := http.NewRequest("POST", URL, data)
添加请求头
req.Header.Set("Connection", "keep-alive") req.Header.Set("Pragma", "no-cache") req.Header.Set("Cache-Control", "no-cache") req.Header.Set("Upgrade-Insecure-Requests", "1") req.Header.Set("Content-Type", "application/x-www-form-urlencoded") req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
发送请求
resp, _:= client.Do(req) // 发送请求 bodyText, _ := ioutil.ReadAll(resp.Body) // 使用缓冲区读取网页内容
「关于cookie」
client.Jar
myStr:=fmt.Sprintf("%s",client.Jar) //强制类型转化 指针装到string
client.Jarcookie
req.Header.Set("Cookie", "ASP.NET_SessionId="+cook)
至此,发送请求部分就完全完成了!
2.1 CSS选择器
github.com/PuerkitoBio/goquery.NewDocumentFromReader
doc, err := goquery.NewDocumentFromReader(resp.Body)
2.2 Xpath 语法
github.com/antchfx/htmlquery.Parse
root, _ := htmlquery.Parse(resp.Body)
2.3 Regex 正则
reId, _ := regexp.Compile(`id=(\d+)`) // 正则匹配 allId := reId.FindAll(bodyText,1) for _,item := range allId { id=string(item) }
3.1 CSS 选择器
doccss选择器语法
doc.Find("#main > div.right > div.detail_main_content"). Each(func(i int, s *goquery.Selection) { Data.title = s.Find("p").Text() Data.time = s.Find("#fbsj").Text() Data.author = s.Find("#author").Text() Data.count = Read_Count(Read_Id) fmt.Println(Data.title, Data.time, Data.author,Data.count) })doc.Find("#news_content_display").Each(func(i int, s *goquery.Selection) { Data.content = s.Find("p").Text() fmt.Println(Data.content) })
3.2 Xpath 语法
rootXpath语法的编写
tr := htmlquery.Find(root, "//*[@id='LB_kb']/table/tbody/tr/td") //使用Xpath进行结点信息的获取 for _, row := range tr { //len(tr)=13 classNames := htmlquery.Find(row, "./font") classPosistions := htmlquery.Find(row,"./text()[4]") classTeachers := htmlquery.Find(row,"./text()[5]") if len(classNames)!=0 { className = htmlquery.InnerText(classNames[0]) classPosistion = htmlquery.InnerText(classPosistions[0]) classTeacher = htmlquery.InnerText(classTeachers[0]) fmt.Println(className) fmt.Println(classPosistion) fmt.Println(classTeacher) } }
4.1 使用原生SQL语句把数据保存Mysql中
定义数据库链接参数
const ( usernameClass = "root" passwordClass = "root" ipClass = "127.0.0.1" portClass = "3306" dbnameClass = "class")
连接数据库
var DB *sql.DBfunc InitDB(){ path := strings.Join([]string{usernameClass, ":", passwordClass, "@tcp(", ipClass, ":", portClass, ")/", dbnameClass, "?charset=utf8"}, "") DB, _ = sql.Open("mysql", path) DB.SetConnMaxLifetime(10) DB.SetMaxIdleConns(5) if err := DB.Ping(); err != nil{ fmt.Println("opon database fail") return } fmt.Println("connect success")}
定义数据类型
type Class struct { classData string teacherName string position string}
插入数据
func InsertData(Data Class) bool { tx, err := DB.Begin() if err != nil{ fmt.Println("tx fail") return false } stmt, err := tx.Prepare("INSERT INTO class_data (`class`,`teacher`,`position`) VALUES (?, ?, ?)") if err != nil{ // 数据的插入 fmt.Println("Prepare fail",err) return false } _, err = stmt.Exec(Data.classData,Data.teacherName,Data.position) //执行事务 if err != nil{ fmt.Println("Exec fail",err) return false } _ = tx.Commit() // 提交事务 return true}
4.2 使用GORM把数据保存到Mysql中
构造GORM模型model
type NewD struct { gorm.Model Title string `gorm:"type:varchar(255);not null;"` Time string `gorm:"type:varchar(256);not null;"` Author string `gorm:"type:varchar(256);not null;"` Count string `gorm:"type:varchar(256);not null;"` Content string `gorm:"type:longtext;not null;"`}
连接数据库
var db *gorm.DBfunc Init() { var err error path := strings.Join([]string{userName_New, ":", password_New, "@tcp(",ip_New, ":", port_New, ")/", dbName_New, "?charset=utf8"}, "") db, err = gorm.Open("mysql", path) if err != nil { panic(err) } fmt.Println("SUCCESS") _ = db.AutoMigrate(&NewD{}) sqlDB := db.DB() sqlDB.SetMaxIdleConns(10) sqlDB.SetMaxOpenConns(100)}
写入数据
NewA := NewD{ Title: Data.title, Time: Data.time, Author: Data.author, Count: Data.count, Content: Data.content, } err = db.Create(&NewA).Error // 在数据库中创建一条数据
期待你的关注~