cover-crawler/crawler.go
2025-03-02 21:12:10 +08:00

235 lines
4.8 KiB
Go

package main
import (
"bytes"
"errors"
"fmt"
"github.com/spf13/viper"
"golang.org/x/sync/errgroup"
"image"
_ "image/jpeg"
"io"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"unicode"
)
type Crawler struct {
avPath string
outputPath string
config *viper.Viper
g errgroup.Group
}
func NewCrawler(avPath, outputPath string) *Crawler {
config := viper.New()
config.SetConfigName("config")
config.AddConfigPath(".")
config.SetConfigType("yaml")
if err := config.ReadInConfig(); err != nil {
fmt.Printf("读取配置文件发生错误, %s", err)
}
return &Crawler{
avPath: avPath,
outputPath: outputPath,
config: config,
}
}
// 检查文件是否是视频文件
func (c *Crawler) isVideoFile(fileName string) bool {
videoExtensions := c.config.GetStringSlice("crawler.video")
ext := strings.ToLower(filepath.Ext(fileName))
for _, videoExt := range videoExtensions {
if ext == videoExt {
return true
}
}
return false
}
// 获取文件信息,包括大小和修改时间
func (c *Crawler) getFileInfo(filePath string) (int64, string, error) {
info, err := os.Stat(filePath)
if err != nil {
return 0, "", err
}
return info.Size(), info.ModTime().String(), nil
}
// 获取代码数字
func (c *Crawler) getCodeNum(s string) int {
runes := []rune(s)
if len(runes) < 3 {
return 0
}
for i := 0; i < 3; i++ {
if !unicode.IsDigit(runes[i]) {
return 0
}
}
num, _ := strconv.Atoi(string(runes[:3]))
return num
}
// 获取封面代码列表
func (c *Crawler) getCoverCodeList(files []string) (coverList []string, err error) {
for _, file := range files {
// 去除域名部分
if strings.IndexRune(file, '@') > 0 {
file = strings.Split(file, "@")[1]
}
nameSlice := strings.Split(file, "-")
if len(nameSlice) < 2 || len(nameSlice[0]) > 5 {
continue
}
num := c.getCodeNum(nameSlice[1])
if num == 0 {
continue
}
format := "%s%05d"
if len(nameSlice[0]) > 4 {
format = "1%s%05d"
}
coverList = append(coverList, fmt.Sprintf(format, strings.ToLower(nameSlice[0]), num))
}
return coverList, nil
}
// 获取封面图片
func (c *Crawler) fetchCoverImg(code string) error {
imgUrl := strings.ReplaceAll(c.config.GetString("crawler.url"), `*`, code)
suffix := filepath.Ext(imgUrl)
nameSlice := strings.Split(code, "00")
fileName := strings.Join([]string{
c.outputPath,
string(os.PathSeparator),
strings.ToUpper(nameSlice[0]),
"-",
nameSlice[1],
suffix,
}, "")
req, err := http.NewRequest("GET", imgUrl, nil)
if err != nil {
return err
}
// 模拟浏览器请求
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
// 检查 HTTP 状态码
if resp.StatusCode != http.StatusOK {
return nil
}
// 读取整个图片数据到内存
imgData, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("无法读取图片数据: %v", err)
}
imgReader := bytes.NewReader(imgData)
img, _, err := image.DecodeConfig(imgReader)
if err != nil {
return err
}
// 图片高度未到达配置最低值则抛弃
if img.Height < c.config.GetInt("crawler.minHeight") {
return nil
}
// 重新创建 Reader 以供文件保存
if _, err := imgReader.Seek(0, io.SeekStart); err != nil {
return err
}
file, err := os.Create(fileName)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(file, imgReader)
if err != nil {
return err
}
return nil
}
func (c *Crawler) Handle() error {
if c.avPath == "未选择" || c.outputPath == "未选择" {
return errors.New("请选择作品存放目录或输出目录")
}
// 用于去重的集合
uniqueFiles := make(map[string]struct{})
var videoFiles []string
if err := filepath.Walk(c.avPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// 仅处理视频文件
if c.isVideoFile(info.Name()) {
// 获取文件大小和修改时间
fileSize, modTime, err := c.getFileInfo(path)
if err != nil {
return err
}
// 根据文件的大小和修改时间生成唯一的文件标识
uniqueID := fmt.Sprintf("%d-%s", fileSize, modTime)
if _, exists := uniqueFiles[uniqueID]; !exists {
uniqueFiles[uniqueID] = struct{}{}
fileName := info.Name()
extIndex := strings.LastIndex(info.Name(), ".")
if extIndex != -1 {
fileName = fileName[:extIndex] // 去除扩展名
}
videoFiles = append(videoFiles, fileName)
}
}
return nil
}); err != nil {
return err
}
coverList, err := c.getCoverCodeList(videoFiles)
if err != nil {
return err
}
for _, cover := range coverList {
c.g.Go(func() error {
if err := c.fetchCoverImg(cover); err != nil {
return err
}
return nil
})
}
return c.g.Wait()
}