From 8c3314aacb686a3bdbbc7b6c242d0ac93683d11d Mon Sep 17 00:00:00 2001 From: fantasticbin Date: Fri, 7 Mar 2025 19:33:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=BF=87=E6=BB=A4=E5=B7=B2?= =?UTF-8?q?=E6=8B=89=E5=8F=96=E5=B0=81=E9=9D=A2=E7=9A=84=E9=80=BB=E8=BE=91?= =?UTF-8?q?=EF=BC=8C=E5=8F=8A=E5=8E=BB=E9=99=A4=E8=AF=B7=E6=B1=82ctx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crawler.go | 105 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 21 deletions(-) diff --git a/crawler.go b/crawler.go index 680d42c..dfb405a 100644 --- a/crawler.go +++ b/crawler.go @@ -2,7 +2,6 @@ package main import ( "bytes" - "context" "errors" "fmt" "github.com/spf13/viper" @@ -69,6 +68,12 @@ func (c *Crawler) isVideoFile(fileName string) bool { return false } +// 检查文件是否是 JPG 文件 +func (c *Crawler) isJPGFile(fileName string) bool { + ext := strings.ToLower(filepath.Ext(fileName)) + return ext == ".jpg" +} + // 获取文件 CRC32 哈希值 func (c *Crawler) getFileInfo(filePath string) (uint32, error) { file, err := os.Open(filePath) @@ -157,7 +162,6 @@ func (c *Crawler) fetchCoverImg(code coverCode) error { imgUrl := c.getCoverImgUrl(code) suffix := filepath.Ext(imgUrl) - fileName := fmt.Sprintf("%s-%03d%s", strings.ToUpper(code.letters), code.number, @@ -165,11 +169,7 @@ func (c *Crawler) fetchCoverImg(code coverCode) error { ) filePath := filepath.Join(c.outputPath, fileName) - // 使用带超时的上下文 - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - req, err := http.NewRequestWithContext(ctx, "GET", imgUrl, nil) + req, err := http.NewRequest(http.MethodGet, imgUrl, nil) if err != nil { return fmt.Errorf("创建请求失败: %w", err) } @@ -218,15 +218,10 @@ func (c *Crawler) fetchCoverImg(code coverCode) error { return nil } -func (c *Crawler) Handle() error { - if c.avPath == "未选择" || c.outputPath == "未选择" { - return errors.New("请选择作品存放目录或输出目录") - } - - // 用于去重的集合 +// 获取作品存放目录视频文件列表 +func (c *Crawler) getAVPathVideoList() (videoFiles []string, err error) { + // 用于去重 uniqueFiles := make(map[string]struct{}) - var videoFiles []string - if err := filepath.Walk(c.avPath, func(path string, info os.FileInfo, err error) error { if err != nil { return fmt.Errorf("访问路径 %s 失败: %w", path, err) @@ -258,19 +253,87 @@ func (c *Crawler) Handle() error { } return nil }); err != nil { - return fmt.Errorf("目录遍历失败: %w", err) + return nil, fmt.Errorf("作品存放目录遍历失败: %w", err) + } + + return videoFiles, nil +} + +// 获取输出目录已存在的封面列表 +func (c *Crawler) getOutPathCoverList() (coverList []coverCode, err error) { + // 用于去重 + uniqueFiles := make(map[string]struct{}) + if err := filepath.Walk(c.avPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return fmt.Errorf("访问路径 %s 失败: %w", path, err) + } + + // 目录过滤 + if info.IsDir() { + return nil + } + + // 仅处理图片文件 + if !c.isJPGFile(info.Name()) { + return nil + } + + baseName := strings.TrimSuffix(info.Name(), filepath.Ext(info.Name())) + if _, exists := uniqueFiles[baseName]; !exists { + uniqueFiles[baseName] = struct{}{} + nameSlice := strings.Split(baseName, "-") + coverList = append(coverList, coverCode{ + letters: strings.ToLower(nameSlice[0]), + number: c.getCodeNum(nameSlice[1]), + }) + } + return nil + }); err != nil { + return nil, fmt.Errorf("输出目录遍历失败: %w", err) + } + + return coverList, nil +} + +func (c *Crawler) Handle() error { + if c.avPath == "未选择" || c.outputPath == "未选择" { + return errors.New("请选择作品存放目录或输出目录") + } + + videoFiles, err := c.getAVPathVideoList() + if err != nil { + return err } coverList := c.getCoverCodeList(videoFiles) + existCovers, err := c.getOutPathCoverList() + if err != nil { + return err + } + + // 过滤已存在的封面 + if len(existCovers) > 0 { + // 创建哈希表用于快速查找 + existMap := make(map[coverCode]struct{}) + for _, c := range existCovers { + existMap[c] = struct{}{} + } + + // 创建新切片过滤已存在项 + filtered := make([]coverCode, 0, len(coverList)) + for _, item := range coverList { + if _, exists := existMap[item]; !exists { + filtered = append(filtered, item) + } + } + + coverList = filtered + } var g errgroup.Group for _, cover := range coverList { g.Go(func() error { - if err := c.fetchCoverImg(cover); err != nil { - return err - } - - return nil + return c.fetchCoverImg(cover) }) }