优化爬虫执行逻辑

This commit is contained in:
fantasticbin 2024-04-04 08:51:19 +08:00
parent 4c8838846a
commit 479b1c7cd7
2 changed files with 5 additions and 7 deletions

View File

@ -47,7 +47,7 @@ func Execute() {
}
// FindChapterTitle 解析文件夹标题
func FindChapterTitle(url string, num int) string {
func FindChapterTitle(url string) string {
resp, err := http.Get(url)
if err != nil {
fmt.Println("请求文件夹标题失败:", err)
@ -68,9 +68,8 @@ func FindChapterTitle(url string, num int) string {
return ""
}
content := string(body)
re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(num) + `\\u8bdd (.+?)"`)
matches := re.FindAllStringSubmatch(content, -1)
re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(chapter) + `\\u8bdd (.+?)"`)
matches := re.FindAllStringSubmatch(string(body), -1)
if len(matches) == 0 {
return ""
@ -84,7 +83,6 @@ func FindChapterTitle(url string, num int) string {
replacements := map[string]string{
"?": "",
":": "",
"!": "",
}
unquoted = strings.TrimSpace(unquoted)
for old, re := range replacements {

View File

@ -13,8 +13,8 @@ import (
)
type Crawler struct {
wg sync.WaitGroup
path string
wg sync.WaitGroup
}
func NewCrawler() *Crawler {
@ -24,7 +24,7 @@ func NewCrawler() *Crawler {
func (c *Crawler) Start() {
c.path = output
if folderTitleUrl != "" {
chapterTitle := FindChapterTitle(folderTitleUrl, chapter)
chapterTitle := FindChapterTitle(folderTitleUrl)
title := strings.Join([]string{
"第",
strconv.Itoa(chapter),