优化爬虫执行逻辑

This commit is contained in:
fantasticbin 2024-04-04 08:51:19 +08:00
parent 4c8838846a
commit 479b1c7cd7
2 changed files with 5 additions and 7 deletions

View File

@ -47,7 +47,7 @@ func Execute() {
} }
// FindChapterTitle 解析文件夹标题 // FindChapterTitle 解析文件夹标题
func FindChapterTitle(url string, num int) string { func FindChapterTitle(url string) string {
resp, err := http.Get(url) resp, err := http.Get(url)
if err != nil { if err != nil {
fmt.Println("请求文件夹标题失败:", err) fmt.Println("请求文件夹标题失败:", err)
@ -68,9 +68,8 @@ func FindChapterTitle(url string, num int) string {
return "" return ""
} }
content := string(body) re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(chapter) + `\\u8bdd (.+?)"`)
re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(num) + `\\u8bdd (.+?)"`) matches := re.FindAllStringSubmatch(string(body), -1)
matches := re.FindAllStringSubmatch(content, -1)
if len(matches) == 0 { if len(matches) == 0 {
return "" return ""
@ -84,7 +83,6 @@ func FindChapterTitle(url string, num int) string {
replacements := map[string]string{ replacements := map[string]string{
"?": "", "?": "",
":": "", ":": "",
"!": "",
} }
unquoted = strings.TrimSpace(unquoted) unquoted = strings.TrimSpace(unquoted)
for old, re := range replacements { for old, re := range replacements {

View File

@ -13,8 +13,8 @@ import (
) )
type Crawler struct { type Crawler struct {
wg sync.WaitGroup
path string path string
wg sync.WaitGroup
} }
func NewCrawler() *Crawler { func NewCrawler() *Crawler {
@ -24,7 +24,7 @@ func NewCrawler() *Crawler {
func (c *Crawler) Start() { func (c *Crawler) Start() {
c.path = output c.path = output
if folderTitleUrl != "" { if folderTitleUrl != "" {
chapterTitle := FindChapterTitle(folderTitleUrl, chapter) chapterTitle := FindChapterTitle(folderTitleUrl)
title := strings.Join([]string{ title := strings.Join([]string{
"第", "第",
strconv.Itoa(chapter), strconv.Itoa(chapter),