优化爬虫执行逻辑
This commit is contained in:
parent
4c8838846a
commit
479b1c7cd7
@ -47,7 +47,7 @@ func Execute() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// FindChapterTitle 解析文件夹标题
|
// FindChapterTitle 解析文件夹标题
|
||||||
func FindChapterTitle(url string, num int) string {
|
func FindChapterTitle(url string) string {
|
||||||
resp, err := http.Get(url)
|
resp, err := http.Get(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("请求文件夹标题失败:", err)
|
fmt.Println("请求文件夹标题失败:", err)
|
||||||
@ -68,9 +68,8 @@ func FindChapterTitle(url string, num int) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
content := string(body)
|
re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(chapter) + `\\u8bdd (.+?)"`)
|
||||||
re := regexp.MustCompile(`\\u7b2c` + strconv.Itoa(num) + `\\u8bdd (.+?)"`)
|
matches := re.FindAllStringSubmatch(string(body), -1)
|
||||||
matches := re.FindAllStringSubmatch(content, -1)
|
|
||||||
|
|
||||||
if len(matches) == 0 {
|
if len(matches) == 0 {
|
||||||
return ""
|
return ""
|
||||||
@ -84,7 +83,6 @@ func FindChapterTitle(url string, num int) string {
|
|||||||
replacements := map[string]string{
|
replacements := map[string]string{
|
||||||
"?": "?",
|
"?": "?",
|
||||||
":": ":",
|
":": ":",
|
||||||
"!": "!",
|
|
||||||
}
|
}
|
||||||
unquoted = strings.TrimSpace(unquoted)
|
unquoted = strings.TrimSpace(unquoted)
|
||||||
for old, re := range replacements {
|
for old, re := range replacements {
|
||||||
|
@ -13,8 +13,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Crawler struct {
|
type Crawler struct {
|
||||||
wg sync.WaitGroup
|
|
||||||
path string
|
path string
|
||||||
|
wg sync.WaitGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCrawler() *Crawler {
|
func NewCrawler() *Crawler {
|
||||||
@ -24,7 +24,7 @@ func NewCrawler() *Crawler {
|
|||||||
func (c *Crawler) Start() {
|
func (c *Crawler) Start() {
|
||||||
c.path = output
|
c.path = output
|
||||||
if folderTitleUrl != "" {
|
if folderTitleUrl != "" {
|
||||||
chapterTitle := FindChapterTitle(folderTitleUrl, chapter)
|
chapterTitle := FindChapterTitle(folderTitleUrl)
|
||||||
title := strings.Join([]string{
|
title := strings.Join([]string{
|
||||||
"第",
|
"第",
|
||||||
strconv.Itoa(chapter),
|
strconv.Itoa(chapter),
|
||||||
|
Loading…
Reference in New Issue
Block a user