优化爬虫执行逻辑
This commit is contained in:
parent
4c8838846a
commit
479b1c7cd7
@ -47,7 +47,7 @@ func Execute() {
|
||||
}
|
||||
|
||||
// FindChapterTitle 解析文件夹标题
|
||||
func FindChapterTitle(url string, num int) string {
|
||||
func FindChapterTitle(url string) string {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Println("请求文件夹标题失败:", err)
|
||||
@ -68,9 +68,8 @@ func FindChapterTitle(url string, num int) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
content := string(body)
|
||||
re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(num) + `\\u8bdd (.+?)"`)
|
||||
matches := re.FindAllStringSubmatch(content, -1)
|
||||
re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(chapter) + `\\u8bdd (.+?)"`)
|
||||
matches := re.FindAllStringSubmatch(string(body), -1)
|
||||
|
||||
if len(matches) == 0 {
|
||||
return ""
|
||||
@ -84,7 +83,6 @@ func FindChapterTitle(url string, num int) string {
|
||||
replacements := map[string]string{
|
||||
"?": "?",
|
||||
":": ":",
|
||||
"!": "!",
|
||||
}
|
||||
unquoted = strings.TrimSpace(unquoted)
|
||||
for old, re := range replacements {
|
||||
|
@ -13,8 +13,8 @@ import (
|
||||
)
|
||||
|
||||
type Crawler struct {
|
||||
wg sync.WaitGroup
|
||||
path string
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func NewCrawler() *Crawler {
|
||||
@ -24,7 +24,7 @@ func NewCrawler() *Crawler {
|
||||
func (c *Crawler) Start() {
|
||||
c.path = output
|
||||
if folderTitleUrl != "" {
|
||||
chapterTitle := FindChapterTitle(folderTitleUrl, chapter)
|
||||
chapterTitle := FindChapterTitle(folderTitleUrl)
|
||||
title := strings.Join([]string{
|
||||
"第",
|
||||
strconv.Itoa(chapter),
|
||||
|
Loading…
Reference in New Issue
Block a user