优化爬虫执行逻辑

master
fantasticbin 6 months ago
parent ef7891de9c
commit 4c8838846a

@ -9,7 +9,7 @@ var rootCmd = &cobra.Command{
Use: "anime", Use: "anime",
Short: "Anime crawler written by go", Short: "Anime crawler written by go",
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
start() NewCrawler().Start()
fmt.Println("抓取完成") fmt.Println("抓取完成")
}, },
} }

@ -12,11 +12,17 @@ import (
"sync" "sync"
) )
var wg sync.WaitGroup type Crawler struct {
wg sync.WaitGroup
path string
}
func NewCrawler() *Crawler {
return &Crawler{}
}
// start 开始执行 func (c *Crawler) Start() {
func start() { c.path = output
path := output
if folderTitleUrl != "" { if folderTitleUrl != "" {
chapterTitle := FindChapterTitle(folderTitleUrl, chapter) chapterTitle := FindChapterTitle(folderTitleUrl, chapter)
title := strings.Join([]string{ title := strings.Join([]string{
@ -25,26 +31,25 @@ func start() {
"话-", "话-",
chapterTitle, chapterTitle,
}, "") }, "")
path = filepath.Join(output, title) // 组装章节路径 c.path = filepath.Join(output, title) // 组装章节路径
} }
err := IfPathNotExistDoMkdir(path) err := IfPathNotExistDoMkdir(c.path)
if err != nil { if err != nil {
fmt.Println("输出目录创建失败:", err) fmt.Println("输出目录创建失败:", err)
return return
} }
c.wg.Add(max)
for i := 1; i <= max; i++ { for i := 1; i <= max; i++ {
wg.Add(1) go c.do(i)
go get(i, path)
} }
wg.Wait() c.wg.Wait()
} }
// get 获取漫画图片 func (c *Crawler) do(num int) {
func get(num int, path string) { defer c.wg.Done()
defer wg.Done()
// 兼容未携带斜杆的地址 // 兼容未携带斜杆的地址
if url[0] != '/' { if url[0] != '/' {
@ -88,7 +93,7 @@ func get(num int, path string) {
}(resp.Body) }(resp.Body)
reader := bufio.NewReaderSize(resp.Body, 32*1024) reader := bufio.NewReaderSize(resp.Body, 32*1024)
file, err := os.Create(path + "/" + fileName) file, err := os.Create(c.path + "/" + fileName)
if err != nil { if err != nil {
fmt.Println(fileName, "图片创建失败:", err) fmt.Println(fileName, "图片创建失败:", err)
return return

Loading…
Cancel
Save