package main import ( "bytes" "errors" "fmt" "github.com/spf13/viper" "golang.org/x/sync/errgroup" "image" _ "image/jpeg" "io" "net/http" "os" "path/filepath" "strconv" "strings" "unicode" ) type Crawler struct { avPath string outputPath string config *viper.Viper } func NewCrawler(avPath, outputPath string) *Crawler { config := viper.New() config.SetConfigName("config") config.AddConfigPath(".") config.SetConfigType("yaml") if err := config.ReadInConfig(); err != nil { fmt.Printf("读取配置文件发生错误, %s", err) } return &Crawler{ avPath: avPath, outputPath: outputPath, config: config, } } // 检查文件是否是视频文件 func (c *Crawler) isVideoFile(fileName string) bool { videoExtensions := c.config.GetStringSlice("crawler.video") ext := strings.ToLower(filepath.Ext(fileName)) for _, videoExt := range videoExtensions { if ext == videoExt { return true } } return false } // 获取文件信息,包括大小和修改时间 func (c *Crawler) getFileInfo(filePath string) (int64, string, error) { info, err := os.Stat(filePath) if err != nil { return 0, "", err } return info.Size(), info.ModTime().String(), nil } // 获取代码数字 func (c *Crawler) getCodeNum(s string) int { runes := []rune(s) if len(runes) < 3 { return 0 } for i := 0; i < 3; i++ { if !unicode.IsDigit(runes[i]) { return 0 } } num, _ := strconv.Atoi(string(runes[:3])) return num } // 获取封面代码列表 func (c *Crawler) getCoverCodeList(files []string) (coverList []string, err error) { for _, file := range files { // 去除域名部分 if strings.IndexRune(file, '@') > 0 { file = strings.Split(file, "@")[1] } nameSlice := strings.Split(file, "-") if len(nameSlice) < 2 || len(nameSlice[0]) > 5 { continue } num := c.getCodeNum(nameSlice[1]) if num == 0 { continue } format := "%s%05d" if len(nameSlice[0]) > 4 { format = "1%s%05d" } coverList = append(coverList, fmt.Sprintf(format, strings.ToLower(nameSlice[0]), num)) } return coverList, nil } // 获取封面图片 func (c *Crawler) fetchCoverImg(code string) error { imgUrl := strings.ReplaceAll(c.config.GetString("crawler.url"), `*`, code) suffix := filepath.Ext(imgUrl) nameSlice := strings.Split(code, "00") fileName := strings.Join([]string{ c.outputPath, string(os.PathSeparator), strings.ToUpper(nameSlice[0]), "-", nameSlice[1], suffix, }, "") req, err := http.NewRequest("GET", imgUrl, nil) if err != nil { return err } // 模拟浏览器请求 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36") resp, err := http.DefaultClient.Do(req) if err != nil { return err } defer resp.Body.Close() // 检查 HTTP 状态码 if resp.StatusCode != http.StatusOK { return nil } // 读取整个图片数据到内存 imgData, err := io.ReadAll(resp.Body) if err != nil { return fmt.Errorf("无法读取图片数据: %v", err) } imgReader := bytes.NewReader(imgData) img, _, err := image.DecodeConfig(imgReader) if err != nil { return err } // 图片高度未到达配置最低值则抛弃 if img.Height < c.config.GetInt("crawler.minHeight") { return nil } // 重新创建 Reader 以供文件保存 if _, err := imgReader.Seek(0, io.SeekStart); err != nil { return err } file, err := os.Create(fileName) if err != nil { return err } defer file.Close() _, err = io.Copy(file, imgReader) if err != nil { return err } return nil } func (c *Crawler) Handle() error { if c.avPath == "未选择" || c.outputPath == "未选择" { return errors.New("请选择作品存放目录或输出目录") } // 用于去重的集合 uniqueFiles := make(map[string]struct{}) var videoFiles []string if err := filepath.Walk(c.avPath, func(path string, info os.FileInfo, err error) error { if err != nil { return err } // 仅处理视频文件 if c.isVideoFile(info.Name()) { // 获取文件大小和修改时间 fileSize, modTime, err := c.getFileInfo(path) if err != nil { return err } // 根据文件的大小和修改时间生成唯一的文件标识 uniqueID := fmt.Sprintf("%d-%s", fileSize, modTime) if _, exists := uniqueFiles[uniqueID]; !exists { uniqueFiles[uniqueID] = struct{}{} fileName := info.Name() extIndex := strings.LastIndex(info.Name(), ".") if extIndex != -1 { fileName = fileName[:extIndex] // 去除扩展名 } videoFiles = append(videoFiles, fileName) } } return nil }); err != nil { return err } coverList, err := c.getCoverCodeList(videoFiles) if err != nil { return err } var g errgroup.Group for _, cover := range coverList { g.Go(func() error { if err := c.fetchCoverImg(cover); err != nil { return err } return nil }) } return g.Wait() }