优化封面获取细节,并复用请求
This commit is contained in:
		
							parent
							
								
									6e264ae8af
								
							
						
					
					
						commit
						185a1bad1c
					
				
							
								
								
									
										48
									
								
								crawler.go
									
									
									
									
									
								
							
							
						
						
									
										48
									
								
								crawler.go
									
									
									
									
									
								
							| @ -2,6 +2,7 @@ package main | |||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"bytes" | 	"bytes" | ||||||
|  | 	"context" | ||||||
| 	"errors" | 	"errors" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"github.com/spf13/viper" | 	"github.com/spf13/viper" | ||||||
| @ -15,6 +16,7 @@ import ( | |||||||
| 	"path/filepath" | 	"path/filepath" | ||||||
| 	"strconv" | 	"strconv" | ||||||
| 	"strings" | 	"strings" | ||||||
|  | 	"time" | ||||||
| 	"unicode" | 	"unicode" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| @ -22,6 +24,7 @@ type Crawler struct { | |||||||
| 	avPath     string | 	avPath     string | ||||||
| 	outputPath string | 	outputPath string | ||||||
| 	config     *viper.Viper | 	config     *viper.Viper | ||||||
|  | 	client     *http.Client | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func NewCrawler(avPath, outputPath string) *Crawler { | func NewCrawler(avPath, outputPath string) *Crawler { | ||||||
| @ -38,6 +41,14 @@ func NewCrawler(avPath, outputPath string) *Crawler { | |||||||
| 		avPath:     avPath, | 		avPath:     avPath, | ||||||
| 		outputPath: outputPath, | 		outputPath: outputPath, | ||||||
| 		config:     config, | 		config:     config, | ||||||
|  | 		client: &http.Client{ | ||||||
|  | 			Timeout: 15 * time.Second, | ||||||
|  | 			Transport: &http.Transport{ | ||||||
|  | 				MaxIdleConns:       10, | ||||||
|  | 				IdleConnTimeout:    30 * time.Second, | ||||||
|  | 				DisableCompression: true, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -119,31 +130,50 @@ func (c *Crawler) getCoverCodeList(files []string) (coverList []string) { | |||||||
| func (c *Crawler) fetchCoverImg(code string) error { | func (c *Crawler) fetchCoverImg(code string) error { | ||||||
| 	imgUrl := strings.ReplaceAll(c.config.GetString("crawler.url"), `*`, code) | 	imgUrl := strings.ReplaceAll(c.config.GetString("crawler.url"), `*`, code) | ||||||
| 	suffix := filepath.Ext(imgUrl) | 	suffix := filepath.Ext(imgUrl) | ||||||
| 	nameSlice := strings.Split(code, "00") |  | ||||||
| 
 | 
 | ||||||
| 	if len(nameSlice) < 2 { | 	offset := 0 | ||||||
|  | 	// 如果第一个字符为 '1',则从下一个字符开始查找
 | ||||||
|  | 	if len(code) > 0 && code[0] == '1' { | ||||||
|  | 		offset = 1 | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// 获取号码所在的位置
 | ||||||
|  | 	index := strings.Index(code[offset:], "00") | ||||||
|  | 	if index == -1 { | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if len(nameSlice[0]) > 4 { | 	// 分隔字母部分及数字部分
 | ||||||
| 		nameSlice[0] = nameSlice[0][1:] | 	letters := code[:index] | ||||||
|  | 	num := code[index+2:] | ||||||
|  | 	if offset > 0 { | ||||||
|  | 		letters = code[1:index] | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	fileName := filepath.Join(c.outputPath, fmt.Sprintf("%s-%s%s", | 	fileName := filepath.Join(c.outputPath, fmt.Sprintf("%s-%s%s", | ||||||
| 		strings.ToUpper(nameSlice[0]), | 		strings.ToUpper(letters), | ||||||
| 		nameSlice[1], | 		num, | ||||||
| 		suffix, | 		suffix, | ||||||
| 	)) | 	)) | ||||||
| 
 | 
 | ||||||
| 	req, err := http.NewRequest("GET", imgUrl, nil) | 	// 使用带超时的上下文
 | ||||||
|  | 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) | ||||||
|  | 	defer cancel() | ||||||
|  | 
 | ||||||
|  | 	req, err := http.NewRequestWithContext(ctx, "GET", imgUrl, nil) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return fmt.Errorf("创建请求失败: %w", err) | 		return fmt.Errorf("创建请求失败: %w", err) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// 模拟浏览器请求
 | 	// 模拟浏览器请求
 | ||||||
| 	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36") | 	userAgents := []string{ | ||||||
|  | 		"Mozilla/5.0 (Windows NT 10.0; Win64; x64)", | ||||||
|  | 		"AppleWebKit/537.36 (KHTML, like Gecko)", | ||||||
|  | 		"Chrome/120.0.0.0 Safari/537.36", | ||||||
|  | 	} | ||||||
|  | 	req.Header.Set("User-Agent", userAgents[time.Now().UnixNano()%int64(len(userAgents))]) | ||||||
| 
 | 
 | ||||||
| 	resp, err := http.DefaultClient.Do(req) | 	resp, err := c.client.Do(req) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return fmt.Errorf("请求失败: %w", err) | 		return fmt.Errorf("请求失败: %w", err) | ||||||
| 	} | 	} | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user