-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
263 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| package idl | ||
|
|
||
| type BilibiliPicture struct { | ||
| ID uint64 `gorm:"primarykey"` | ||
| Url string `gorm:"column:url"` | ||
| DynamicID uint64 `gorm:"column:dynamic_id"` | ||
| TopicName string `gorm:"column:topic_name"` | ||
| SentAt uint64 `gorm:"column:sent_at"` | ||
| CreatedAt uint64 `gorm:"autoCreateTime"` | ||
| UpdatedAt uint64 `gorm:"autoUpdateTime"` | ||
| } | ||
|
|
||
| type BilibiliPictureDTO struct { | ||
| ID uint64 `json:"id"` | ||
| Url string `json:"url"` | ||
| CreatedAt uint64 `json:"created_at"` | ||
| } | ||
|
|
||
| func (BilibiliPicture) TableName() string { | ||
| return "bilibili_pictures" | ||
| } | ||
|
|
||
| type BilibiliPictureRepository interface { | ||
| Create(items []*BilibiliPicture) error | ||
| FindMaxDynamicID(topicName string) (*uint64, error) | ||
| } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,167 @@ | ||
| package spider | ||
|
|
||
| import ( | ||
| "context" | ||
| "encoding/json" | ||
| "fmt" | ||
| "strconv" | ||
| "time" | ||
|
|
||
| "git.vtb.link/eoefans/internal/app/api/idl" | ||
| "git.vtb.link/eoefans/internal/pkg/bilibili" | ||
| "git.vtb.link/eoefans/internal/repository" | ||
| "github.com/pkg/errors" | ||
| "go.uber.org/zap" | ||
| "gorm.io/gorm" | ||
| ) | ||
|
|
||
| type Picture struct { | ||
| stopChan chan bool | ||
| db *gorm.DB | ||
| logger *zap.Logger | ||
| sdk *bilibili.SDK | ||
| isRunning bool | ||
| } | ||
|
|
||
| func NewPicture(db *gorm.DB, logger *zap.Logger, sdk *bilibili.SDK) *Picture { | ||
| return &Picture{ | ||
| stopChan: make(chan bool), | ||
| db: db, | ||
| logger: logger, | ||
| sdk: sdk, | ||
| } | ||
| } | ||
|
|
||
| func (v *Picture) Stop(ctx context.Context) error { | ||
| v.logger.Info("stopping spider server") | ||
|
|
||
| for { | ||
| select { | ||
| case <-ctx.Done(): | ||
| return errors.New("shutdown spider server timeout") | ||
| default: | ||
| if err := v.stop(); err != nil { | ||
| return errors.Wrap(err, "shutdown spider server error") | ||
| } | ||
| return nil | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func (v *Picture) stop() error { | ||
| v.stopChan <- true | ||
| v.isRunning = false | ||
| return nil | ||
| } | ||
|
|
||
| func (v *Picture) Run(ctx context.Context) error { | ||
| tk := time.NewTicker(60 * time.Minute) | ||
| v.isRunning = true | ||
|
|
||
| go func() { | ||
| if err := v.spider(); err != nil { | ||
| v.logger.Error("start spider server error", zap.Error(err)) | ||
| } | ||
| }() | ||
|
|
||
| go func(_tk *time.Ticker) { | ||
| for { | ||
| select { | ||
| case <-_tk.C: | ||
| v.logger.Info("[tick] picture spider", zap.Time("time", time.Now())) | ||
| if err := v.spider(); err != nil { | ||
| v.logger.Error("start picture server error", zap.Error(err)) | ||
| } | ||
| case <-v.stopChan: | ||
| return | ||
| } | ||
| } | ||
| }(tk) | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func (p *Picture) spider() error { | ||
| //把当前数据库最大的动态ID查出来 | ||
| //调用接口,将大于当前动态ID的都入DB,如果存在小于的,则可提前结束,尽量保证没有重复数据 | ||
| topics := []string{ | ||
| bilibili.TopicNameGoGo, | ||
| bilibili.TopicNameMino, | ||
| bilibili.TopicNameUn, | ||
| bilibili.TopicNameMoMo, | ||
| bilibili.TopicNameWan, | ||
| bilibili.TopicNameEOE, | ||
| } | ||
| for _, topic := range topics { | ||
| curMaxDynamicID, err := repository.NewBilibiliPicture(p.db).FindMaxDynamicID(topic) | ||
| if err != nil { | ||
| p.logger.Error("FindMaxDynamicID error", zap.String("topic_name", topic), zap.Error(err)) | ||
| continue | ||
| } | ||
| var hasMore uint = 1 | ||
| var offset uint64 = 0 | ||
| exist := false //判断有没有已经爬过 | ||
| for hasMore == 1 && !exist { | ||
| data, err := p.sdk.TopicDynamic(topic, offset) | ||
| if err != nil { | ||
| p.logger.Error("TopicDynamic error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) | ||
| time.Sleep(500 * time.Millisecond) | ||
| break | ||
| } | ||
| hasMore = data.HasMore | ||
| dynamicID, err := strconv.ParseUint(data.Offset, 10, 64) | ||
| if err == nil { | ||
| offset = dynamicID | ||
| } else { | ||
| hasMore = 0 | ||
| } | ||
| items := make([]*idl.BilibiliPicture, 0) | ||
| for _, v := range data.Cards { | ||
| switch v.Desc.Type { | ||
| case bilibili.DynamicDraw: | ||
| dynamicID, _ := strconv.ParseUint(v.Desc.DynamicId, 10, 64) | ||
| if dynamicID <= *curMaxDynamicID { | ||
| //后面所有的都是爬过的,提前结束,后续也不再请求api | ||
| exist = true | ||
| break | ||
| } | ||
| pictures, err := parsePicturesFromCard(v.Card) | ||
| if err != nil { | ||
| p.logger.Error("ParsePicturesFromCard error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) | ||
| continue | ||
| } | ||
| for _, url := range pictures { | ||
| items = append(items, &idl.BilibiliPicture{ | ||
| Url: url, | ||
| DynamicID: dynamicID, | ||
| TopicName: topic, | ||
| SentAt: v.Desc.TimeStamp, | ||
| }) | ||
| } | ||
| default: | ||
| continue | ||
| } | ||
| } | ||
| //插入数据 | ||
| if len(items) != 0 { | ||
| err := repository.NewBilibiliPicture(p.db).Create(items) | ||
| if err != nil { | ||
| p.logger.Error("Create bilibli_pictures error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func parsePicturesFromCard(data string) ([]string, error) { | ||
| var content bilibili.DynamicCardContent | ||
| if err := json.Unmarshal([]byte(data), &content); err != nil { | ||
| return nil, err | ||
| } | ||
| pics := make([]string, 0, len(content.Item.Pictures)) | ||
| for _, v := range content.Item.Pictures { | ||
| pics = append(pics, v.ImgSrc) | ||
| } | ||
| return pics, nil | ||
| } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| package repository | ||
|
|
||
| import ( | ||
| "git.vtb.link/eoefans/internal/app/api/idl" | ||
| "gorm.io/gorm" | ||
| ) | ||
|
|
||
| func NewBilibiliPicture(tx *gorm.DB) idl.BilibiliPictureRepository { | ||
| return &BilibiliPictureMysqlImpl{tx: tx} | ||
| } | ||
|
|
||
| type BilibiliPictureMysqlImpl struct { | ||
| tx *gorm.DB | ||
| } | ||
|
|
||
| func (impl *BilibiliPictureMysqlImpl) Create(items []*idl.BilibiliPicture) error { | ||
| if len(items) == 0 { | ||
| return nil | ||
| } | ||
| //针对url去重 | ||
| return impl.tx.Transaction(func(_tx *gorm.DB) error { | ||
| urls := make([]string, 0, len(items)) | ||
| for _, v := range items { | ||
| urls = append(urls, v.Url) | ||
| } | ||
| var exist []*idl.BilibiliPicture | ||
| err := _tx.Table(idl.BilibiliPicture{}.TableName()).Where("url in (?)", urls).Distinct("url").Find(&exist).Error | ||
| if err != nil { | ||
| return err | ||
| } | ||
| filter := make([]*idl.BilibiliPicture, 0) | ||
| for i := range items { | ||
| find := false | ||
| for j := range exist { | ||
| if items[i].Url == exist[j].Url { | ||
| find = true | ||
| break | ||
| } | ||
| } | ||
| if !find { | ||
| filter = append(filter, items[i]) | ||
| } | ||
| } | ||
| err = _tx.Table(idl.BilibiliPicture{}.TableName()).Create(&filter).Error | ||
| if err != nil { | ||
| return err | ||
| } | ||
| return nil | ||
| }) | ||
| } | ||
|
|
||
| func (impl *BilibiliPictureMysqlImpl) FindMaxDynamicID(topicName string) (*uint64, error) { | ||
| var id uint64 | ||
| conn := impl.tx.Table(idl.BilibiliPicture{}.TableName()) | ||
| err := conn.Select("max(dynamic_id) as id").Where("topic_name = ?", topicName).Group("dynamic_id").Scan(&id).Error | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| return &id, nil | ||
| } |