package main import ( "bytes" "errors" "fmt" "github.com/spf13/viper" "golang.org/x/sync/errgroup" "image" _ "image/jpeg" "io" "net/http" "os" "path/filepath" "strconv" "strings" "unicode" ) type Crawler struct { avPath string outputPath string config *viper.Viper } func NewCrawler(avPath, outputPath string) *Crawler { config := viper.New() config.SetConfigName("config") config.AddConfigPath(".") config.SetConfigType("yaml") if err := config.ReadInConfig(); err != nil { fmt.Printf("读取配置文件发生错误, %s", err) } return &Crawler{ avPath: avPath, outputPath: outputPath, config: config, } } // 检查文件是否是视频文件 func (c *Crawler) isVideoFile(fileName string) bool { videoExtensions := c.config.GetStringSlice("crawler.video") ext := strings.ToLower(filepath.Ext(fileName)) for _, videoExt := range videoExtensions { if ext == videoExt { return true } } return false } // 获取文件信息,包括大小和修改时间 func (c *Crawler) getFileInfo(filePath string) (int64, string, error) { info, err := os.Stat(filePath) if err != nil { return 0, "", err } return info.Size(), info.ModTime().String(), nil } // 获取代码数字 func (c *Crawler) getCodeNum(s string) int { runes := []rune(s) if len(runes) < 3 { return 0 } for i := 0; i < 3; i++ { if !unicode.IsDigit(runes[i]) { return 0 } } num, _ := strconv.Atoi(string(runes[:3])) return num } // 获取封面代码列表 func (c *Crawler) getCoverCodeList(files []string) (coverList []string) { for _, file := range files { // 去除域名部分 if strings.IndexRune(file, '@') > 0 { file = strings.Split(file, "@")[1] } nameSlice := strings.Split(file, "-") if len(nameSlice) < 2 || len(nameSlice[0]) > 5 { continue } num := c.getCodeNum(nameSlice[1]) if num == 0 { continue } format := "%s%05d" if len(nameSlice[0]) > 4 { format = "1%s%05d" } coverList = append(coverList, fmt.Sprintf(format, strings.ToLower(nameSlice[0]), num)) } return coverList } // 获取封面图片 func (c *Crawler) fetchCoverImg(code string) error { imgUrl := strings.ReplaceAll(c.config.GetString("crawler.url"), `*`, code) suffix := filepath.Ext(imgUrl) nameSlice := strings.Split(code, "00") if len(nameSlice) < 2 { return nil } if len(nameSlice[0]) > 4 { nameSlice[0] = nameSlice[0][1:] } fileName := filepath.Join(c.outputPath, fmt.Sprintf("%s-%s%s", strings.ToUpper(nameSlice[0]), nameSlice[1], suffix, )) req, err := http.NewRequest("GET", imgUrl, nil) if err != nil { return fmt.Errorf("创建请求失败: %w", err) } // 模拟浏览器请求 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36") resp, err := http.DefaultClient.Do(req) if err != nil { return fmt.Errorf("请求失败: %w", err) } defer resp.Body.Close() // 检查 HTTP 状态码 if resp.StatusCode != http.StatusOK { return nil } // 先完整读取 HTTP Body imgData, err := io.ReadAll(resp.Body) if err != nil { return fmt.Errorf("读取图片数据失败: %w", err) } // 使用内存数据解码图片 img, _, err := image.DecodeConfig(bytes.NewReader(imgData)) if err != nil { return fmt.Errorf("图片解码失败: %w", err) } // 图片高度未到达配置最低值则抛弃 if img.Height < c.config.GetInt("crawler.minHeight") { return nil } // 将内存数据写入文件 if err := os.WriteFile(fileName, imgData, 0644); err != nil { return fmt.Errorf("文件写入失败: %w", err) } return nil } func (c *Crawler) Handle() error { if c.avPath == "未选择" || c.outputPath == "未选择" { return errors.New("请选择作品存放目录或输出目录") } // 用于去重的集合 uniqueFiles := make(map[string]struct{}) var videoFiles []string if err := filepath.Walk(c.avPath, func(path string, info os.FileInfo, err error) error { if err != nil { return fmt.Errorf("访问路径 %s 失败: %w", path, err) } // 仅处理视频文件 if !c.isVideoFile(info.Name()) { return nil } baseName := strings.TrimSuffix(info.Name(), filepath.Ext(info.Name())) // 获取文件大小和修改时间 fileSize, modTime, err := c.getFileInfo(path) if err != nil { return fmt.Errorf("获取文件信息失败 %s: %w", baseName, err) } // 根据文件的大小和修改时间生成唯一的文件标识 uniqueID := fmt.Sprintf("%d-%s", fileSize, modTime) if _, exists := uniqueFiles[uniqueID]; !exists { uniqueFiles[uniqueID] = struct{}{} videoFiles = append(videoFiles, baseName) } return nil }); err != nil { return fmt.Errorf("目录遍历失败: %w", err) } coverList := c.getCoverCodeList(videoFiles) var g errgroup.Group for _, cover := range coverList { g.Go(func() error { if err := c.fetchCoverImg(cover); err != nil { return err } return nil }) } return g.Wait() }