From 2b710b40e2c04bd7c63791cc601c3068d54fc040 Mon Sep 17 00:00:00 2001 From: bianjiajie Date: Sun, 15 Jan 2023 15:45:05 +0800 Subject: [PATCH 1/6] =?UTF-8?q?[feat]=20=E5=A2=9E=E5=8A=A0=E8=AF=9D?= =?UTF-8?q?=E9=A2=98=E5=8A=A8=E6=80=81=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/pkg/bilibili/video.go | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/internal/pkg/bilibili/video.go b/internal/pkg/bilibili/video.go index 576a762..837189c 100644 --- a/internal/pkg/bilibili/video.go +++ b/internal/pkg/bilibili/video.go @@ -1,8 +1,10 @@ package bilibili import ( + "encoding/json" "fmt" "net/http" + "net/url" "git.vtb.link/eoefans/internal/pkg/httpclient" "github.com/go-resty/resty/v2" @@ -14,12 +16,29 @@ const ( webVideoSearchURL = "https://api.bilibili.com/x/web-interface/search/type?context=&search_type=video&page=%d&order=pubdate&keyword=%s&duration=0&category_id=&tids_2=&__refresh__=true&_extra=&tids=0&highlight=1&single_column=0" webVideoInfoURL = "https://api.bilibili.com/x/web-interface/view?bvid=%s" webVideoTagInfoURL = "https://api.bilibili.com/x/web-interface/view/detail/tag?aid=%s" + topicHistory = "https://api.vc.bilibili.com/topic_svr/v1/topic_svr/topic_history?offset_dynamic_id=%d&" ) const ( cookie = `buvid3=84D177FC-9F90-100D-8BE2-09EE5D9912C823748infoc; b_nut=1672450523; i-wanna-go-back=-1; b_lsid=10D3F3189_18565D1C03E; _uuid=1F95104D7-B7410-FFAD-4263-1EF4BFB14B61023203infoc; buvid_fp=beb12c3870e59666cfee50c6431e8b1a; buvid4=2F7A4DEB-FE17-1579-67FF-E0004E5BFE6D24471-022123109-ytYURKXzrIAOEIOhWNJfdQ%3D%3D; SESSDATA=239cfdaf%2C1688002658%2Ca176f%2Ac2; bili_jct=f4be98b5fb468b646b7260f10ba547e1; DedeUserID=554518161; DedeUserID__ckMd5=d1653a8f679dcf53; CURRENT_FNVAL=4048; sid=870jixri; is-2022-channel=1; hit-new-style-dyn=0; hit-dyn-v2=1; b_ut=5; innersign=1; rpdid=|(u~|||~Y)m)0J\'uY~kRl|~uu'` ) +type DynamicType uint + +const ( + DynamicDraw DynamicType = 2 //图片动态 +) + +const ( + //topicHistory用topic_id查出来的数据有问题,故暂时用topic_name + topNameWan = "小莞熊在这里" + topNameUn = "柚恩的蜜罐子" + topNameGoGo = "GOGO队立大功!" //中文感叹号 + topNameMoMo = "虞你在一起" + topNameMino = "和米诺的对抗路日常" + topNameEOE = "EOE的魔法盒" +) + type SDK struct { logger *zap.Logger } @@ -35,6 +54,33 @@ type ResponseBasic struct { Data interface{} `json:"data"` } +type DynamicInfo struct { + Cards []DynamicCard `json:"cards"` + HasMore uint `json:"has_more"` + Offset string `json:"offset"` +} + +type DynamicCard struct { + Desc struct { + Type DynamicType `json:"type"` + } `json:"desc"` + Card json.RawMessage `json:"card"` +} + +// Card是json字符串,需要进一步解析 +type DynamicCardContent struct { + Item struct { + Pictures DynamicPictures `json:"pictures"` + } `json:"item"` +} + +type DynamicPictures []DynamicPicture +type DynamicPicture struct { + Height float64 `json:"img_height"` + Size float64 `json:"img_size"` + Width float64 `json:"img_width"` + ImgSrc string `json:"image_src"` +} type VideoSearchInfo struct { Type string `json:"type"` Id int `json:"id"` @@ -290,3 +336,15 @@ func (sdk *SDK) VideoWebTagInfo(aid string) (data *VideoTagResponse, err error) } return data, nil } + +func (sdk *SDK) TopicDynamic(topicName string, offsetDynamicId uint64) (data *DynamicInfo, err error) { + params := url.Values{} + params.Add("topic_name", topicName) + url := fmt.Sprintf(topicHistory, offsetDynamicId) + url = url + params.Encode() + fmt.Println(url) + if err = sdk.fastGet(url, &data); err != nil { + return nil, err + } + return data, nil +} From a2c8f2f6fd04e46c562f32118672da48048ca72d Mon Sep 17 00:00:00 2001 From: bianjiajie Date: Sun, 15 Jan 2023 21:24:51 +0800 Subject: [PATCH 2/6] =?UTF-8?q?[feat]=20=E7=88=AC=E5=8F=96=E5=9B=BE?= =?UTF-8?q?=E6=96=87=E5=8A=A8=E6=80=81=E7=9A=84=E5=9B=BE=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/app/api/idl/bilibili_picture.go | 26 ++++ internal/app/spider/picture.go | 167 +++++++++++++++++++++++ internal/pkg/bilibili/video.go | 20 +-- internal/repository/bilibili_picture.go | 60 ++++++++ 4 files changed, 263 insertions(+), 10 deletions(-) create mode 100644 internal/app/api/idl/bilibili_picture.go create mode 100644 internal/app/spider/picture.go create mode 100644 internal/repository/bilibili_picture.go diff --git a/internal/app/api/idl/bilibili_picture.go b/internal/app/api/idl/bilibili_picture.go new file mode 100644 index 0000000..0101591 --- /dev/null +++ b/internal/app/api/idl/bilibili_picture.go @@ -0,0 +1,26 @@ +package idl + +type BilibiliPicture struct { + ID uint64 `gorm:"primarykey"` + Url string `gorm:"column:url"` + DynamicID uint64 `gorm:"column:dynamic_id"` + TopicName string `gorm:"column:topic_name"` + SentAt uint64 `gorm:"column:sent_at"` + CreatedAt uint64 `gorm:"autoCreateTime"` + UpdatedAt uint64 `gorm:"autoUpdateTime"` +} + +type BilibiliPictureDTO struct { + ID uint64 `json:"id"` + Url string `json:"url"` + CreatedAt uint64 `json:"created_at"` +} + +func (BilibiliPicture) TableName() string { + return "bilibili_pictures" +} + +type BilibiliPictureRepository interface { + Create(items []*BilibiliPicture) error + FindMaxDynamicID(topicName string) (*uint64, error) +} diff --git a/internal/app/spider/picture.go b/internal/app/spider/picture.go new file mode 100644 index 0000000..ea6c0ca --- /dev/null +++ b/internal/app/spider/picture.go @@ -0,0 +1,167 @@ +package spider + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "time" + + "git.vtb.link/eoefans/internal/app/api/idl" + "git.vtb.link/eoefans/internal/pkg/bilibili" + "git.vtb.link/eoefans/internal/repository" + "github.com/pkg/errors" + "go.uber.org/zap" + "gorm.io/gorm" +) + +type Picture struct { + stopChan chan bool + db *gorm.DB + logger *zap.Logger + sdk *bilibili.SDK + isRunning bool +} + +func NewPicture(db *gorm.DB, logger *zap.Logger, sdk *bilibili.SDK) *Picture { + return &Picture{ + stopChan: make(chan bool), + db: db, + logger: logger, + sdk: sdk, + } +} + +func (v *Picture) Stop(ctx context.Context) error { + v.logger.Info("stopping spider server") + + for { + select { + case <-ctx.Done(): + return errors.New("shutdown spider server timeout") + default: + if err := v.stop(); err != nil { + return errors.Wrap(err, "shutdown spider server error") + } + return nil + } + } +} + +func (v *Picture) stop() error { + v.stopChan <- true + v.isRunning = false + return nil +} + +func (v *Picture) Run(ctx context.Context) error { + tk := time.NewTicker(60 * time.Minute) + v.isRunning = true + + go func() { + if err := v.spider(); err != nil { + v.logger.Error("start spider server error", zap.Error(err)) + } + }() + + go func(_tk *time.Ticker) { + for { + select { + case <-_tk.C: + v.logger.Info("[tick] picture spider", zap.Time("time", time.Now())) + if err := v.spider(); err != nil { + v.logger.Error("start picture server error", zap.Error(err)) + } + case <-v.stopChan: + return + } + } + }(tk) + + return nil +} + +func (p *Picture) spider() error { + //把当前数据库最大的动态ID查出来 + //调用接口,将大于当前动态ID的都入DB,如果存在小于的,则可提前结束,尽量保证没有重复数据 + topics := []string{ + bilibili.TopicNameGoGo, + bilibili.TopicNameMino, + bilibili.TopicNameUn, + bilibili.TopicNameMoMo, + bilibili.TopicNameWan, + bilibili.TopicNameEOE, + } + for _, topic := range topics { + curMaxDynamicID, err := repository.NewBilibiliPicture(p.db).FindMaxDynamicID(topic) + if err != nil { + p.logger.Error("FindMaxDynamicID error", zap.String("topic_name", topic), zap.Error(err)) + continue + } + var hasMore uint = 1 + var offset uint64 = 0 + exist := false //判断有没有已经爬过 + for hasMore == 1 && !exist { + data, err := p.sdk.TopicDynamic(topic, offset) + if err != nil { + p.logger.Error("TopicDynamic error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + time.Sleep(500 * time.Millisecond) + break + } + hasMore = data.HasMore + dynamicID, err := strconv.ParseUint(data.Offset, 10, 64) + if err == nil { + offset = dynamicID + } else { + hasMore = 0 + } + items := make([]*idl.BilibiliPicture, 0) + for _, v := range data.Cards { + switch v.Desc.Type { + case bilibili.DynamicDraw: + dynamicID, _ := strconv.ParseUint(v.Desc.DynamicId, 10, 64) + if dynamicID <= *curMaxDynamicID { + //后面所有的都是爬过的,提前结束,后续也不再请求api + exist = true + break + } + pictures, err := parsePicturesFromCard(v.Card) + if err != nil { + p.logger.Error("ParsePicturesFromCard error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + continue + } + for _, url := range pictures { + items = append(items, &idl.BilibiliPicture{ + Url: url, + DynamicID: dynamicID, + TopicName: topic, + SentAt: v.Desc.TimeStamp, + }) + } + default: + continue + } + } + //插入数据 + if len(items) != 0 { + err := repository.NewBilibiliPicture(p.db).Create(items) + if err != nil { + p.logger.Error("Create bilibli_pictures error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + } + } + } + } + return nil +} + +func parsePicturesFromCard(data string) ([]string, error) { + var content bilibili.DynamicCardContent + if err := json.Unmarshal([]byte(data), &content); err != nil { + return nil, err + } + pics := make([]string, 0, len(content.Item.Pictures)) + for _, v := range content.Item.Pictures { + pics = append(pics, v.ImgSrc) + } + return pics, nil +} diff --git a/internal/pkg/bilibili/video.go b/internal/pkg/bilibili/video.go index 837189c..a5b82dc 100644 --- a/internal/pkg/bilibili/video.go +++ b/internal/pkg/bilibili/video.go @@ -1,7 +1,6 @@ package bilibili import ( - "encoding/json" "fmt" "net/http" "net/url" @@ -28,15 +27,14 @@ type DynamicType uint const ( DynamicDraw DynamicType = 2 //图片动态 ) - const ( //topicHistory用topic_id查出来的数据有问题,故暂时用topic_name - topNameWan = "小莞熊在这里" - topNameUn = "柚恩的蜜罐子" - topNameGoGo = "GOGO队立大功!" //中文感叹号 - topNameMoMo = "虞你在一起" - topNameMino = "和米诺的对抗路日常" - topNameEOE = "EOE的魔法盒" + TopicNameWan = "小莞熊在这里" + TopicNameUn = "柚恩的蜜罐子" + TopicNameGoGo = "GOGO队立大功!" //中文感叹号 + TopicNameMoMo = "虞你在一起" + TopicNameMino = "和米诺的对抗路日常" + TopicNameEOE = "EOE的魔法盒" ) type SDK struct { @@ -62,9 +60,11 @@ type DynamicInfo struct { type DynamicCard struct { Desc struct { - Type DynamicType `json:"type"` + Type DynamicType `json:"type"` + DynamicId string `json:"dynamic_id"` + TimeStamp uint64 `json:"timestamp"` } `json:"desc"` - Card json.RawMessage `json:"card"` + Card string `json:"card"` } // Card是json字符串,需要进一步解析 diff --git a/internal/repository/bilibili_picture.go b/internal/repository/bilibili_picture.go new file mode 100644 index 0000000..5d63380 --- /dev/null +++ b/internal/repository/bilibili_picture.go @@ -0,0 +1,60 @@ +package repository + +import ( + "git.vtb.link/eoefans/internal/app/api/idl" + "gorm.io/gorm" +) + +func NewBilibiliPicture(tx *gorm.DB) idl.BilibiliPictureRepository { + return &BilibiliPictureMysqlImpl{tx: tx} +} + +type BilibiliPictureMysqlImpl struct { + tx *gorm.DB +} + +func (impl *BilibiliPictureMysqlImpl) Create(items []*idl.BilibiliPicture) error { + if len(items) == 0 { + return nil + } + //针对url去重 + return impl.tx.Transaction(func(_tx *gorm.DB) error { + urls := make([]string, 0, len(items)) + for _, v := range items { + urls = append(urls, v.Url) + } + var exist []*idl.BilibiliPicture + err := _tx.Table(idl.BilibiliPicture{}.TableName()).Where("url in (?)", urls).Distinct("url").Find(&exist).Error + if err != nil { + return err + } + filter := make([]*idl.BilibiliPicture, 0) + for i := range items { + find := false + for j := range exist { + if items[i].Url == exist[j].Url { + find = true + break + } + } + if !find { + filter = append(filter, items[i]) + } + } + err = _tx.Table(idl.BilibiliPicture{}.TableName()).Create(&filter).Error + if err != nil { + return err + } + return nil + }) +} + +func (impl *BilibiliPictureMysqlImpl) FindMaxDynamicID(topicName string) (*uint64, error) { + var id uint64 + conn := impl.tx.Table(idl.BilibiliPicture{}.TableName()) + err := conn.Select("max(dynamic_id) as id").Where("topic_name = ?", topicName).Group("dynamic_id").Scan(&id).Error + if err != nil { + return nil, err + } + return &id, nil +} From 2cef8ca20ef17ddba830c93974ef430074986497 Mon Sep 17 00:00:00 2001 From: runstp Date: Mon, 16 Jan 2023 22:05:44 +0800 Subject: [PATCH 3/6] =?UTF-8?q?[feature]=20=E6=96=B0=E5=A2=9Epicture?= =?UTF-8?q?=E5=88=B0=E5=90=AF=E5=8A=A8=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd/spider/main.go | 22 +++++++++++++++++++++- internal/app/spider/picture.go | 28 ++++++++++++++-------------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/cmd/spider/main.go b/cmd/spider/main.go index 6a8c1ad..5abf576 100644 --- a/cmd/spider/main.go +++ b/cmd/spider/main.go @@ -22,13 +22,21 @@ func newSpider() fx.Option { video_analysis.Provide(), fx.Provide(spider.NewVideo), fx.Provide(spider.NewUpdate), + fx.Provide(spider.NewPicture), fx.Provide(bilibili.NewSDK), fx.Provide(health.NewCheckServer), fx.Invoke(lc), ) } -func lc(lifecycle fx.Lifecycle, spiderVideo *spider.Video, spiderUpdate *spider.Update, checkServer *health.CheckServer, shutdown fx.Shutdowner) { +func lc( + lifecycle fx.Lifecycle, + spiderVideo *spider.Video, + spiderUpdate *spider.Update, + spiderPicture *spider.Picture, + checkServer *health.CheckServer, + shutdown fx.Shutdowner, +) { lifecycle.Append(fx.Hook{ OnStart: func(ctx context.Context) error { return spiderVideo.Run(ctx) @@ -53,6 +61,18 @@ func lc(lifecycle fx.Lifecycle, spiderVideo *spider.Video, spiderUpdate *spider. }, }) + lifecycle.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + return spiderPicture.Run(ctx) + }, + OnStop: func(ctx context.Context) error { + if err := spiderPicture.Stop(ctx); err != nil { + return err + } + return shutdown.Shutdown() + }, + }) + lifecycle.Append(fx.Hook{ OnStart: func(ctx context.Context) error { go func() { diff --git a/internal/app/spider/picture.go b/internal/app/spider/picture.go index ea6c0ca..013365b 100644 --- a/internal/app/spider/picture.go +++ b/internal/app/spider/picture.go @@ -32,15 +32,15 @@ func NewPicture(db *gorm.DB, logger *zap.Logger, sdk *bilibili.SDK) *Picture { } } -func (v *Picture) Stop(ctx context.Context) error { - v.logger.Info("stopping spider server") +func (p *Picture) Stop(ctx context.Context) error { + p.logger.Info("stopping spider server") for { select { case <-ctx.Done(): return errors.New("shutdown spider server timeout") default: - if err := v.stop(); err != nil { + if err := p.stop(); err != nil { return errors.Wrap(err, "shutdown spider server error") } return nil @@ -48,19 +48,19 @@ func (v *Picture) Stop(ctx context.Context) error { } } -func (v *Picture) stop() error { - v.stopChan <- true - v.isRunning = false +func (p *Picture) stop() error { + p.stopChan <- true + p.isRunning = false return nil } -func (v *Picture) Run(ctx context.Context) error { +func (p *Picture) Run(ctx context.Context) error { tk := time.NewTicker(60 * time.Minute) - v.isRunning = true + p.isRunning = true go func() { - if err := v.spider(); err != nil { - v.logger.Error("start spider server error", zap.Error(err)) + if err := p.spider(); err != nil { + p.logger.Error("start spider server error", zap.Error(err)) } }() @@ -68,11 +68,11 @@ func (v *Picture) Run(ctx context.Context) error { for { select { case <-_tk.C: - v.logger.Info("[tick] picture spider", zap.Time("time", time.Now())) - if err := v.spider(); err != nil { - v.logger.Error("start picture server error", zap.Error(err)) + p.logger.Info("[tick] picture spider", zap.Time("time", time.Now())) + if err := p.spider(); err != nil { + p.logger.Error("start picture server error", zap.Error(err)) } - case <-v.stopChan: + case <-p.stopChan: return } } From 4c15ebc373a7a03716532e4301a462fe8a70c657 Mon Sep 17 00:00:00 2001 From: bianjiajie Date: Sat, 21 Jan 2023 15:28:00 +0800 Subject: [PATCH 4/6] =?UTF-8?q?[feature]=20=E5=8A=A8=E6=80=81=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E7=88=AC=E8=99=AB=E4=B8=8Eapi?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd/spider/main.go | 2 + database/init.sql | 18 ++++ internal/app/api/handler/bilbil_picture.go | 40 ++++++++ internal/app/api/idl/bilibili_picture.go | 96 ++++++++++++++++--- internal/app/api/router/router.go | 10 +- internal/app/api/service/bilbil_picture.go | 64 +++++++++++++ internal/app/provide.go | 1 + internal/app/spider/picture.go | 65 +++++++------ internal/app/spider/update_dynamic.go | 104 +++++++++++++++++++++ internal/pkg/bilibili/video.go | 26 +++++- internal/repository/bilibili_picture.go | 85 +++++++++++------ 11 files changed, 434 insertions(+), 77 deletions(-) create mode 100644 internal/app/api/handler/bilbil_picture.go create mode 100644 internal/app/api/service/bilbil_picture.go create mode 100644 internal/app/spider/update_dynamic.go diff --git a/cmd/spider/main.go b/cmd/spider/main.go index 5abf576..0100d9f 100644 --- a/cmd/spider/main.go +++ b/cmd/spider/main.go @@ -23,6 +23,7 @@ func newSpider() fx.Option { fx.Provide(spider.NewVideo), fx.Provide(spider.NewUpdate), fx.Provide(spider.NewPicture), + fx.Provide(spider.NewUpdateDynamic), fx.Provide(bilibili.NewSDK), fx.Provide(health.NewCheckServer), fx.Invoke(lc), @@ -34,6 +35,7 @@ func lc( spiderVideo *spider.Video, spiderUpdate *spider.Update, spiderPicture *spider.Picture, + spiderUpdatePicture *spider.UpdateDynamic, checkServer *health.CheckServer, shutdown fx.Shutdowner, ) { diff --git a/database/init.sql b/database/init.sql index 0b17e63..b79bc8c 100644 --- a/database/init.sql +++ b/database/init.sql @@ -109,3 +109,21 @@ INSERT INTO video_analysis (type, `key`, score) VALUES ('tag', '柚恩', 80); INSERT INTO video_analysis (type, `key`, score) VALUES ('tag', '柚恩不加糖', 100); INSERT INTO video_analysis (type, `key`, score) VALUES ('tag', 'EOE', 80); INSERT INTO video_analysis (type, `key`, score) VALUES ('tag', 'EOE组合', 100); + +create table bilibili_dynamics ( + id bigint unsigned not null auto_increment primary key comment 'id', + uid bigint unsigned not null comment 'B站用户id', + dynamic_id bigint unsigned not null comment 'B站动态id', + pictures json not null comment '图片', + topic_name varchar(64) comment '话题名称', + topic_id bigint unsigned not null comment '话题id', + view_nums bigint unsigned not null default '0' comment '看过的数量', + repost bigint unsigned not null default '0' comment '转发数量', + comment_nums bigint unsigned not null default '0' comment '评论数量', + favor bigint unsigned not null default '0' comment '点赞数量', + sent_at bigint unsigned not null comment '动态发生时间', + created_at bigint unsigned not null comment '创建时间', + updated_at bigint unsigned not null comment '更新时间', + index idx_dynamic_id(dynamic_id) comment '动态id索引', + index idx_sent_at(sent_at) comment '发送时间索引' +)Engine=InnoDB comment '动态' charset 'utf8mb4'; \ No newline at end of file diff --git a/internal/app/api/handler/bilbil_picture.go b/internal/app/api/handler/bilbil_picture.go new file mode 100644 index 0000000..7649a37 --- /dev/null +++ b/internal/app/api/handler/bilbil_picture.go @@ -0,0 +1,40 @@ +package handler + +import ( + "net/http" + + "git.vtb.link/eoefans/internal/app/api/apperrors" + "git.vtb.link/eoefans/internal/app/api/help" + "git.vtb.link/eoefans/internal/app/api/idl" + "git.vtb.link/eoefans/internal/app/api/service" + "github.com/gin-gonic/gin" +) + +func BilibiliLatestPics(s *service.BilbilPicture) func(ctx *gin.Context) { + return func(ctx *gin.Context) { + var req idl.BilibiliPictureLatestReq + if err := ctx.ShouldBindQuery(&req); err != nil { + _ = ctx.Error(apperrors.NewValidationError(400, err.Error()).Wrap(err)) + return + } + + if resp, err := s.Latest(ctx, req); err != nil { + _ = ctx.Error(err) + return + } else { + ctx.JSON(http.StatusOK, help.SuccessJson(resp)) + } + } +} + +func BilibiliRecommendPics(s *service.BilbilPicture) func(ctx *gin.Context) { + return func(ctx *gin.Context) { + var req idl.BilibiliPictureRecommendReq + if resp, err := s.Recommend(ctx,req); err != nil { + _ = ctx.Error(err) + return + } else { + ctx.JSON(http.StatusOK, help.SuccessJson(resp)) + } + } +} diff --git a/internal/app/api/idl/bilibili_picture.go b/internal/app/api/idl/bilibili_picture.go index 0101591..52d09da 100644 --- a/internal/app/api/idl/bilibili_picture.go +++ b/internal/app/api/idl/bilibili_picture.go @@ -1,26 +1,92 @@ package idl -type BilibiliPicture struct { - ID uint64 `gorm:"primarykey"` - Url string `gorm:"column:url"` - DynamicID uint64 `gorm:"column:dynamic_id"` - TopicName string `gorm:"column:topic_name"` - SentAt uint64 `gorm:"column:sent_at"` - CreatedAt uint64 `gorm:"autoCreateTime"` - UpdatedAt uint64 `gorm:"autoUpdateTime"` +import ( + "database/sql/driver" + "encoding/json" + "errors" + "time" +) + +// 图片来源于动态,以动态为单位 +type BilibiliDynamic struct { + ID uint64 `gorm:"primarykey"` + UID uint64 `gorm:"column:uid"` + DynamicID uint64 `gorm:"column:dynamic_id"` + Pictures BilibiliDynamicPictures `gorm:"column:pictures;"` + TopicName string `gorm:"column:topic_name"` + TopicID uint64 `gorm:"column:topic_id"` + View uint64 `gorm:"column:view_nums"` + Repost uint64 `gorm:"column:repost"` + Comment uint64 `gorm:"column:comment_nums"` + Like uint64 `gorm:"column:favor"` + SentAt uint64 `gorm:"column:sent_at"` + CreatedAt uint64 `gorm:"autoCreateTime"` + UpdatedAt uint64 `gorm:"autoUpdateTime"` +} + +func (p BilibiliDynamicPictures) Value() (driver.Value, error) { + return json.Marshal(p) +} + +func (c *BilibiliDynamicPictures) Scan(input interface{}) error { + data, ok := input.([]byte) + if !ok { + return errors.New("invalid input in Scan") + } + result := BilibiliDynamicPictures{} + err := json.Unmarshal(data, &result) + if err != nil { + return err + } + *c = result + return nil +} + +type BilibiliPictureLatestReq struct { + Page int `form:"page,default=1" binding:"omitempty,gt=0"` + TopicID int `form:"topic_id"` +} + +type BilibiliPictureRecommendReq struct { + TopicID int `form:"topic_id"` +} +type BilibiliPicturesCommonResp struct { + Result []*BilibiliDynamicDTO `json:"result"` +} +type BilibiliPicturesLatestResp struct { + BilibiliPicturesCommonResp + Page int `json:"page"` + Total int `json:"total"` +} + +type BilibiliPicturesRecommendResp struct { + BilibiliPicturesCommonResp + Total int `json:"total"` +} +type BilibiliDynamicDTO struct { + DynamicID uint64 `json:"dynamic_id"` + Pictures BilibiliDynamicPictures `json:"pictures"` + SentAt uint64 `json:"sent_at"` } -type BilibiliPictureDTO struct { - ID uint64 `json:"id"` - Url string `json:"url"` - CreatedAt uint64 `json:"created_at"` +type BilibiliDynamicPictures []BilibiliDynamicPicture +type BilibiliDynamicPicture struct { + Height float64 `json:"img_height"` + Size float64 `json:"img_size"` + Width float64 `json:"img_width"` + ImgSrc string `json:"image_src"` } -func (BilibiliPicture) TableName() string { - return "bilibili_pictures" +func (BilibiliDynamic) TableName() string { + return "bilibili_dynamics" } type BilibiliPictureRepository interface { - Create(items []*BilibiliPicture) error + Create(items []*BilibiliDynamic) error FindMaxDynamicID(topicName string) (*uint64, error) + Update(updates map[string]interface{}, dynamicID uint64) error + FindAllByPubDate(from, to time.Time, page, size int64) (list []*BilibiliDynamic, err error) + Latest(page, size, topicID int) (list []*BilibiliDynamic, err error) + //推荐暂时先默认50个 + Recommend(from, to time.Time, size, topicID int) (list []*BilibiliDynamic, err error) } diff --git a/internal/app/api/router/router.go b/internal/app/api/router/router.go index c4ccbe3..fee7fcb 100644 --- a/internal/app/api/router/router.go +++ b/internal/app/api/router/router.go @@ -15,6 +15,7 @@ func Provide() fx.Option { func InitRouters( bvService *service.BilbilVideo, + picService *service.BilbilPicture, authService *service.Auth, userService *service.User, errMiddlewares *middlewares.ErrorInterceptor, @@ -24,9 +25,14 @@ func InitRouters( // http 异常处理 r.Use(errMiddlewares.Handler) - // 视频搜素 + // 视频搜索 r.GET("/v1/video-interface/advanced-search", handler.BilibiliVideoSearch(bvService)) - + //图片 + picApi := r.Group("/v1/pic") + { + picApi.GET("/latest", handler.BilibiliLatestPics(picService)) + picApi.GET("/recommend", handler.BilibiliRecommendPics(picService)) + } // Auth相关 authApi := r.Group("/v1/auth") { diff --git a/internal/app/api/service/bilbil_picture.go b/internal/app/api/service/bilbil_picture.go new file mode 100644 index 0000000..d460ce3 --- /dev/null +++ b/internal/app/api/service/bilbil_picture.go @@ -0,0 +1,64 @@ +package service + +import ( + "context" + "time" + + "git.vtb.link/eoefans/internal/app/api/idl" + "git.vtb.link/eoefans/internal/repository" + "gorm.io/gorm" +) + +const ( + picRecommendDefaultSize = 50 +) + +type BilbilPicture struct { + db *gorm.DB +} + +func NewBilibiliPicture(db *gorm.DB) *BilbilPicture { + return &BilbilPicture{db: db} +} + +func (service *BilbilPicture) Latest(ctx context.Context, req idl.BilibiliPictureLatestReq) (*idl.BilibiliPicturesLatestResp, error) { + tx := service.db.WithContext(ctx) + picRepository := repository.NewBilibiliPicture(tx) + list, err := picRepository.Latest(req.Page, defaultQuerySize, req.TopicID) + if err != nil { + return nil, err + } + resp := idl.BilibiliPicturesLatestResp{ + Page: req.Page, + Total: len(list), + } + for i := range list { + resp.Result = append(resp.Result, &idl.BilibiliDynamicDTO{ + DynamicID: list[i].DynamicID, + Pictures: list[i].Pictures, + SentAt: list[i].SentAt, + }) + } + return &resp, nil +} + +func (service *BilbilPicture) Recommend(ctx context.Context, req idl.BilibiliPictureRecommendReq) (*idl.BilibiliPicturesRecommendResp, error) { + tx := service.db.WithContext(ctx) + picRepository := repository.NewBilibiliPicture(tx) + now := time.Now() + list, err := picRepository.Recommend(now.Add(-(3 * 24 * time.Hour)), now, picRecommendDefaultSize, req.TopicID) + if err != nil { + return nil, err + } + resp := idl.BilibiliPicturesRecommendResp{ + Total: len(list), + } + for i := range list { + resp.Result = append(resp.Result, &idl.BilibiliDynamicDTO{ + DynamicID: list[i].DynamicID, + Pictures: list[i].Pictures, + SentAt: list[i].SentAt, + }) + } + return &resp, nil +} diff --git a/internal/app/provide.go b/internal/app/provide.go index 60ddac0..2200e04 100644 --- a/internal/app/provide.go +++ b/internal/app/provide.go @@ -33,6 +33,7 @@ func MiddlewareProvider() fx.Option { func ServiceProvider() fx.Option { return fx.Provide( service.NewBilbilVideo, + service.NewBilibiliPicture, service.NewAuth, service.NewUser, ) diff --git a/internal/app/spider/picture.go b/internal/app/spider/picture.go index 013365b..30ca805 100644 --- a/internal/app/spider/picture.go +++ b/internal/app/spider/picture.go @@ -84,27 +84,27 @@ func (p *Picture) Run(ctx context.Context) error { func (p *Picture) spider() error { //把当前数据库最大的动态ID查出来 //调用接口,将大于当前动态ID的都入DB,如果存在小于的,则可提前结束,尽量保证没有重复数据 - topics := []string{ - bilibili.TopicNameGoGo, - bilibili.TopicNameMino, - bilibili.TopicNameUn, - bilibili.TopicNameMoMo, - bilibili.TopicNameWan, - bilibili.TopicNameEOE, + topicsMap := map[string]uint64{ + bilibili.TopicNameGoGo: bilibili.TopicIDGoGo, + bilibili.TopicNameMino: bilibili.TopicIDMino, + bilibili.TopicNameUn: bilibili.TopicIDUn, + bilibili.TopicNameMoMo: bilibili.TopicIDMoMo, + bilibili.TopicNameWan: bilibili.TopicIDWan, + bilibili.TopicNameEOE: bilibili.TopicIDEOE, } - for _, topic := range topics { - curMaxDynamicID, err := repository.NewBilibiliPicture(p.db).FindMaxDynamicID(topic) + for topicName, topicID := range topicsMap { + curMaxDynamicID, err := repository.NewBilibiliPicture(p.db).FindMaxDynamicID(topicName) if err != nil { - p.logger.Error("FindMaxDynamicID error", zap.String("topic_name", topic), zap.Error(err)) + p.logger.Error("FindMaxDynamicID error", zap.String("topic_name", topicName), zap.Error(err)) continue } var hasMore uint = 1 var offset uint64 = 0 exist := false //判断有没有已经爬过 for hasMore == 1 && !exist { - data, err := p.sdk.TopicDynamic(topic, offset) + data, err := p.sdk.TopicDynamics(topicName, offset) if err != nil { - p.logger.Error("TopicDynamic error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + p.logger.Error("TopicDynamics error", zap.String("topic_name", topicName), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) time.Sleep(500 * time.Millisecond) break } @@ -115,29 +115,37 @@ func (p *Picture) spider() error { } else { hasMore = 0 } - items := make([]*idl.BilibiliPicture, 0) + items := make([]*idl.BilibiliDynamic, 0) for _, v := range data.Cards { switch v.Desc.Type { case bilibili.DynamicDraw: - dynamicID, _ := strconv.ParseUint(v.Desc.DynamicId, 10, 64) + dynamicID, _ := strconv.ParseUint(v.Desc.DynamicID, 10, 64) if dynamicID <= *curMaxDynamicID { //后面所有的都是爬过的,提前结束,后续也不再请求api exist = true break } + dynamic := &idl.BilibiliDynamic{ + UID: v.Desc.UID, + DynamicID: dynamicID, + TopicName: topicName, + TopicID: topicID, + View: v.Desc.View, + Repost: v.Desc.Repost, + Comment: v.Desc.Comment, + Like: v.Desc.Like, + SentAt: v.Desc.TimeStamp, + } pictures, err := parsePicturesFromCard(v.Card) if err != nil { - p.logger.Error("ParsePicturesFromCard error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + p.logger.Error("ParsePicturesFromCard error", zap.String("topic_name", topicName), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) continue } - for _, url := range pictures { - items = append(items, &idl.BilibiliPicture{ - Url: url, - DynamicID: dynamicID, - TopicName: topic, - SentAt: v.Desc.TimeStamp, - }) + if len(pictures) == 0 { + continue } + dynamic.Pictures = pictures + items = append(items, dynamic) default: continue } @@ -146,7 +154,7 @@ func (p *Picture) spider() error { if len(items) != 0 { err := repository.NewBilibiliPicture(p.db).Create(items) if err != nil { - p.logger.Error("Create bilibli_pictures error", zap.String("topic_name", topic), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + p.logger.Error("Create bilibli_pictures error", zap.String("topic_name", topicName), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) } } } @@ -154,14 +162,19 @@ func (p *Picture) spider() error { return nil } -func parsePicturesFromCard(data string) ([]string, error) { +func parsePicturesFromCard(data string) ([]idl.BilibiliDynamicPicture, error) { var content bilibili.DynamicCardContent if err := json.Unmarshal([]byte(data), &content); err != nil { return nil, err } - pics := make([]string, 0, len(content.Item.Pictures)) + pics := make([]idl.BilibiliDynamicPicture, 0, len(content.Item.Pictures)) for _, v := range content.Item.Pictures { - pics = append(pics, v.ImgSrc) + pics = append(pics, idl.BilibiliDynamicPicture{ + Height: v.Height, + Size: v.Size, + Width: v.Width, + ImgSrc: v.ImgSrc, + }) } return pics, nil } diff --git a/internal/app/spider/update_dynamic.go b/internal/app/spider/update_dynamic.go new file mode 100644 index 0000000..76986ab --- /dev/null +++ b/internal/app/spider/update_dynamic.go @@ -0,0 +1,104 @@ +package spider + +import ( + "context" + + "git.vtb.link/eoefans/internal/pkg/bilibili" + "git.vtb.link/eoefans/internal/repository" + + "time" + + "github.com/pkg/errors" + "go.uber.org/zap" + "gorm.io/gorm" +) + +type UpdateDynamic struct { + stopChan chan bool + db *gorm.DB + logger *zap.Logger + sdk *bilibili.SDK +} + +func NewUpdateDynamic(db *gorm.DB, logger *zap.Logger, sdk *bilibili.SDK) *Update { + return &Update{ + stopChan: make(chan bool), + db: db, + logger: logger, + sdk: sdk, + } +} + +func (u *UpdateDynamic) Stop(ctx context.Context) error { + u.logger.Info("stopping spider server") + + for { + select { + case <-ctx.Done(): + return errors.New("shutdown dynamic update spider server timeout") + default: + close(u.stopChan) + return nil + } + } +} + +func (u *UpdateDynamic) Run(ctx context.Context) error { + go func() { + if err := u.spider(); err != nil { + u.logger.Error("start dynamic update error", zap.Error(err)) + } + }() + + tk := time.NewTicker(180 * time.Minute) + go func(_tk *time.Ticker) { + for { + select { + case <-_tk.C: + u.logger.Info("[tick] dynamic update spider", zap.Time("time", time.Now())) + if err := u.spider(); err != nil { + u.logger.Error("start dynamic update error", zap.Error(err)) + } + case <-u.stopChan: + return + } + } + }(tk) + + return nil +} + +func (u *UpdateDynamic) spider() error { + tx := u.db.WithContext(context.TODO()) + repo := repository.NewBilibiliPicture(tx) + + size := 100 + for p := 1; true; p++ { + list, err := repo.FindAllByPubDate(time.Now().Add(-(3 * 24 * time.Hour)), time.Now(), int64(p), int64(size)) + if err != nil { + u.logger.Error("[UpdateDynamic spider()]FindAllByPubDate error", zap.Int("page", p), zap.Error(err)) + return nil + } + for _, v := range list { + dynamicCard, err := u.sdk.Dynamic(v.DynamicID) + if err != nil { + u.logger.Error("Dynamic error", zap.Int("dynamic_id", int(v.DynamicID)), zap.Error(err)) + continue + } + updates := map[string]interface{}{ + "view": dynamicCard.Desc.View, + "repost": dynamicCard.Desc.Repost, + "comment": dynamicCard.Desc.Comment, + "like": dynamicCard.Desc.Like, + } + if err := repo.Update(updates, v.DynamicID); err != nil { + u.logger.Error("Dynamic Update error", zap.Int("dynamic_id", int(v.DynamicID)), zap.Error(err)) + } + } + if len(list) < size { + break + } + } + + return nil +} diff --git a/internal/pkg/bilibili/video.go b/internal/pkg/bilibili/video.go index a5b82dc..8d8b3b8 100644 --- a/internal/pkg/bilibili/video.go +++ b/internal/pkg/bilibili/video.go @@ -16,6 +16,7 @@ const ( webVideoInfoURL = "https://api.bilibili.com/x/web-interface/view?bvid=%s" webVideoTagInfoURL = "https://api.bilibili.com/x/web-interface/view/detail/tag?aid=%s" topicHistory = "https://api.vc.bilibili.com/topic_svr/v1/topic_svr/topic_history?offset_dynamic_id=%d&" + dynamic = "https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/get_dynamic_detail?dynamic_id=%d" ) const ( @@ -35,6 +36,13 @@ const ( TopicNameMoMo = "虞你在一起" TopicNameMino = "和米诺的对抗路日常" TopicNameEOE = "EOE的魔法盒" + + TopicIDWan uint64 = 28953983 + TopicIDUn uint64 = 28950030 + TopicIDGoGo uint64 = 29067608 + TopicIDMoMo uint64 = 28948378 + TopicIDMino uint64 = 29069147 + TopicIDEOE uint64 = 29156150 ) type SDK struct { @@ -61,7 +69,12 @@ type DynamicInfo struct { type DynamicCard struct { Desc struct { Type DynamicType `json:"type"` - DynamicId string `json:"dynamic_id"` + View uint64 `json:"view"` + Repost uint64 `json:"repost"` + Comment uint64 `json:"comment"` + Like uint64 `json:"like"` + UID uint64 `json:"uid"` + DynamicID string `json:"dynamic_id"` TimeStamp uint64 `json:"timestamp"` } `json:"desc"` Card string `json:"card"` @@ -337,12 +350,19 @@ func (sdk *SDK) VideoWebTagInfo(aid string) (data *VideoTagResponse, err error) return data, nil } -func (sdk *SDK) TopicDynamic(topicName string, offsetDynamicId uint64) (data *DynamicInfo, err error) { +func (sdk *SDK) TopicDynamics(topicName string, offsetDynamicId uint64) (data *DynamicInfo, err error) { params := url.Values{} params.Add("topic_name", topicName) url := fmt.Sprintf(topicHistory, offsetDynamicId) url = url + params.Encode() - fmt.Println(url) + if err = sdk.fastGet(url, &data); err != nil { + return nil, err + } + return data, nil +} + +func (sdk *SDK) Dynamic(dynamicId uint64) (data *DynamicCard, err error) { + url := fmt.Sprintf(topicHistory, dynamicId) if err = sdk.fastGet(url, &data); err != nil { return nil, err } diff --git a/internal/repository/bilibili_picture.go b/internal/repository/bilibili_picture.go index 5d63380..2b083e7 100644 --- a/internal/repository/bilibili_picture.go +++ b/internal/repository/bilibili_picture.go @@ -1,6 +1,8 @@ package repository import ( + "time" + "git.vtb.link/eoefans/internal/app/api/idl" "gorm.io/gorm" ) @@ -13,48 +15,69 @@ type BilibiliPictureMysqlImpl struct { tx *gorm.DB } -func (impl *BilibiliPictureMysqlImpl) Create(items []*idl.BilibiliPicture) error { +func (impl *BilibiliPictureMysqlImpl) Create(items []*idl.BilibiliDynamic) error { if len(items) == 0 { return nil } - //针对url去重 - return impl.tx.Transaction(func(_tx *gorm.DB) error { - urls := make([]string, 0, len(items)) - for _, v := range items { - urls = append(urls, v.Url) - } - var exist []*idl.BilibiliPicture - err := _tx.Table(idl.BilibiliPicture{}.TableName()).Where("url in (?)", urls).Distinct("url").Find(&exist).Error - if err != nil { - return err - } - filter := make([]*idl.BilibiliPicture, 0) - for i := range items { - find := false - for j := range exist { - if items[i].Url == exist[j].Url { - find = true - break - } - } - if !find { - filter = append(filter, items[i]) - } - } - err = _tx.Table(idl.BilibiliPicture{}.TableName()).Create(&filter).Error - if err != nil { - return err - } + return impl.tx.Table(idl.BilibiliDynamic{}.TableName()).Create(&items).Error +} + +func (impl *BilibiliPictureMysqlImpl) Update(updates map[string]interface{}, dynamicID uint64) error { + if len(updates) == 0 { return nil - }) + } + return impl.tx.Table(idl.BilibiliDynamic{}.TableName()).Where("dynamic_id=?", dynamicID).Updates(updates).Error } func (impl *BilibiliPictureMysqlImpl) FindMaxDynamicID(topicName string) (*uint64, error) { var id uint64 - conn := impl.tx.Table(idl.BilibiliPicture{}.TableName()) + conn := impl.tx.Table(idl.BilibiliDynamic{}.TableName()) err := conn.Select("max(dynamic_id) as id").Where("topic_name = ?", topicName).Group("dynamic_id").Scan(&id).Error if err != nil { return nil, err } return &id, nil } + +func (impl *BilibiliPictureMysqlImpl) FindAllByPubDate(from, to time.Time, page, size int64) (list []*idl.BilibiliDynamic, err error) { + err = impl.tx.Table(idl.BilibiliDynamic{}.TableName()). + Where("sent_at >= ? AND sent_at <= ?", from.Unix(), to.Unix()). + Select("dynamic_id"). + Offset(int((page - 1) * size)).Limit(int(size)). + Order("sent_at DESC"). + Find(&list).Error + if err != nil { + return nil, err + } + return list, nil +} + +func (impl *BilibiliPictureMysqlImpl) Latest(page, size, topicID int) (list []*idl.BilibiliDynamic, err error) { + conn := impl.tx.Table(idl.BilibiliDynamic{}.TableName()) + if topicID != 0 { + conn = conn.Where("topic_id = ?", topicID) + } + err = conn.Select("dynamic_id,pictures,sent_at"). + Order("sent_at DESC"). + Offset((page - 1) * size). + Limit(size).Find(&list).Error + if err != nil { + return nil, err + } + return list, nil +} + +func (impl *BilibiliPictureMysqlImpl) Recommend(from, to time.Time, size, topicID int) (list []*idl.BilibiliDynamic, err error) { + conn := impl.tx.Table(idl.BilibiliDynamic{}.TableName()) + if topicID != 0 { + conn = conn.Where("topic_id = ?", topicID) + } + err = conn.Select("dynamic_id,pictures,sent_at"). + Where("sent_at >= ? AND sent_at <= ?", from.Unix(), to.Unix()). + Order("favor DESC"). + Limit(size).Find(&list).Error + if err != nil { + return nil, err + } + return list, nil +} From b40fd7ead412ca6726b9aa103b704e23bb323558 Mon Sep 17 00:00:00 2001 From: bianjiajie Date: Mon, 23 Jan 2023 14:14:47 +0800 Subject: [PATCH 5/6] =?UTF-8?q?[feature]=20=E8=B0=83=E6=95=B4=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E6=8E=A8=E8=8D=90=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/app/api/handler/bilbil_picture.go | 7 +- internal/app/api/idl/bilibili_picture.go | 7 +- internal/app/api/service/bilbil_picture.go | 5 +- internal/app/spider/picture.go | 5 +- internal/app/spider/picture_test.go | 75 ++++++++++++++++++++++ internal/app/spider/update_dynamic.go | 10 +-- internal/pkg/bilibili/api_test.go | 30 +++++++++ internal/pkg/bilibili/video.go | 16 +++-- internal/repository/bilibili_picture.go | 13 +++- 9 files changed, 145 insertions(+), 23 deletions(-) create mode 100644 internal/app/spider/picture_test.go create mode 100644 internal/pkg/bilibili/api_test.go diff --git a/internal/app/api/handler/bilbil_picture.go b/internal/app/api/handler/bilbil_picture.go index 7649a37..73aecd1 100644 --- a/internal/app/api/handler/bilbil_picture.go +++ b/internal/app/api/handler/bilbil_picture.go @@ -17,7 +17,6 @@ func BilibiliLatestPics(s *service.BilbilPicture) func(ctx *gin.Context) { _ = ctx.Error(apperrors.NewValidationError(400, err.Error()).Wrap(err)) return } - if resp, err := s.Latest(ctx, req); err != nil { _ = ctx.Error(err) return @@ -30,7 +29,11 @@ func BilibiliLatestPics(s *service.BilbilPicture) func(ctx *gin.Context) { func BilibiliRecommendPics(s *service.BilbilPicture) func(ctx *gin.Context) { return func(ctx *gin.Context) { var req idl.BilibiliPictureRecommendReq - if resp, err := s.Recommend(ctx,req); err != nil { + if err := ctx.ShouldBindQuery(&req); err != nil { + _ = ctx.Error(apperrors.NewValidationError(400, err.Error()).Wrap(err)) + return + } + if resp, err := s.Recommend(ctx, req); err != nil { _ = ctx.Error(err) return } else { diff --git a/internal/app/api/idl/bilibili_picture.go b/internal/app/api/idl/bilibili_picture.go index 52d09da..8394b1a 100644 --- a/internal/app/api/idl/bilibili_picture.go +++ b/internal/app/api/idl/bilibili_picture.go @@ -48,6 +48,7 @@ type BilibiliPictureLatestReq struct { } type BilibiliPictureRecommendReq struct { + Page int `form:"page,default=1" binding:"omitempty,gt=0"` TopicID int `form:"topic_id"` } type BilibiliPicturesCommonResp struct { @@ -61,6 +62,7 @@ type BilibiliPicturesLatestResp struct { type BilibiliPicturesRecommendResp struct { BilibiliPicturesCommonResp + Page int `json:"page"` Total int `json:"total"` } type BilibiliDynamicDTO struct { @@ -74,7 +76,7 @@ type BilibiliDynamicPicture struct { Height float64 `json:"img_height"` Size float64 `json:"img_size"` Width float64 `json:"img_width"` - ImgSrc string `json:"image_src"` + ImgSrc string `json:"img_src"` } func (BilibiliDynamic) TableName() string { @@ -87,6 +89,5 @@ type BilibiliPictureRepository interface { Update(updates map[string]interface{}, dynamicID uint64) error FindAllByPubDate(from, to time.Time, page, size int64) (list []*BilibiliDynamic, err error) Latest(page, size, topicID int) (list []*BilibiliDynamic, err error) - //推荐暂时先默认50个 - Recommend(from, to time.Time, size, topicID int) (list []*BilibiliDynamic, err error) + Recommend(from, to time.Time, page, size, topicID int) (list []*BilibiliDynamic, err error) } diff --git a/internal/app/api/service/bilbil_picture.go b/internal/app/api/service/bilbil_picture.go index d460ce3..e788f85 100644 --- a/internal/app/api/service/bilbil_picture.go +++ b/internal/app/api/service/bilbil_picture.go @@ -10,7 +10,7 @@ import ( ) const ( - picRecommendDefaultSize = 50 + picRecommendDefaultSize = 20 ) type BilbilPicture struct { @@ -46,12 +46,13 @@ func (service *BilbilPicture) Recommend(ctx context.Context, req idl.BilibiliPic tx := service.db.WithContext(ctx) picRepository := repository.NewBilibiliPicture(tx) now := time.Now() - list, err := picRepository.Recommend(now.Add(-(3 * 24 * time.Hour)), now, picRecommendDefaultSize, req.TopicID) + list, err := picRepository.Recommend(now.Add(-(3 * 24 * time.Hour)), now, req.Page, picRecommendDefaultSize, req.TopicID) if err != nil { return nil, err } resp := idl.BilibiliPicturesRecommendResp{ Total: len(list), + Page: req.Page, } for i := range list { resp.Result = append(resp.Result, &idl.BilibiliDynamicDTO{ diff --git a/internal/app/spider/picture.go b/internal/app/spider/picture.go index 30ca805..9967aba 100644 --- a/internal/app/spider/picture.go +++ b/internal/app/spider/picture.go @@ -119,15 +119,14 @@ func (p *Picture) spider() error { for _, v := range data.Cards { switch v.Desc.Type { case bilibili.DynamicDraw: - dynamicID, _ := strconv.ParseUint(v.Desc.DynamicID, 10, 64) - if dynamicID <= *curMaxDynamicID { + if v.Desc.DynamicID <= *curMaxDynamicID { //后面所有的都是爬过的,提前结束,后续也不再请求api exist = true break } dynamic := &idl.BilibiliDynamic{ UID: v.Desc.UID, - DynamicID: dynamicID, + DynamicID: v.Desc.DynamicID, TopicName: topicName, TopicID: topicID, View: v.Desc.View, diff --git a/internal/app/spider/picture_test.go b/internal/app/spider/picture_test.go new file mode 100644 index 0000000..29cf4a6 --- /dev/null +++ b/internal/app/spider/picture_test.go @@ -0,0 +1,75 @@ +package spider + +import ( + "testing" + + "git.vtb.link/eoefans/internal/app/api/idl" + "git.vtb.link/eoefans/internal/pkg/bilibili" + "git.vtb.link/eoefans/internal/pkg/database" + "git.vtb.link/eoefans/internal/repository" + "go.uber.org/zap" +) + +func TestFindCurMaxDynamicIDByTopicName(t *testing.T) { + db, err := database.NewDatabase(&database.Options{ + Type: "mysql", + DSN: "root:123456@tcp(127.0.0.1:3306)/eoes?charset=utf8mb4&parseTime=true&loc=Asia%2FShanghai", + Debug: true, + SetMaxIdleConns: 2, + SetMaxOpenConns: 4, + SetConnMaxLifetime: 6, + }) + if err != nil { + t.Error(err) + return + } + id, err := repository.NewBilibiliPicture(db).FindMaxDynamicID("EOE的魔法盒") + if err != nil { + t.Error(err) + return + } + if id != nil { + t.Log(*id) + } +} +func TestInsertPicture(t *testing.T) { + db, err := database.NewDatabase(&database.Options{ + Type: "mysql", + DSN: "root:123456@tcp(127.0.0.1:3306)/eoes?charset=utf8mb4&parseTime=true&loc=Asia%2FShanghai", + Debug: true, + SetMaxIdleConns: 2, + SetMaxOpenConns: 4, + SetConnMaxLifetime: 6, + }) + if err != nil { + t.Error(err) + return + } + sdk := bilibili.NewSDK(&zap.Logger{}) + var dynamicID uint64 = 752843351274815520 + res, err := sdk.Dynamic(uint64(dynamicID)) + if err != nil { + t.Error(err) + return + } + pictures, err := parsePicturesFromCard(res.Card.Card) + if err != nil { + t.Error(err) + return + } + items := []*idl.BilibiliDynamic{{ + UID: res.Card.Desc.UID, + DynamicID: 752843351274815520, + Pictures: pictures, + View: res.Card.Desc.View, + Repost: res.Card.Desc.Repost, + Comment: res.Card.Desc.Comment, + Like: res.Card.Desc.Like, + SentAt: res.Card.Desc.TimeStamp, + }} + err = repository.NewBilibiliPicture(db).Create(items) + if err != nil { + t.Error(err) + return + } +} diff --git a/internal/app/spider/update_dynamic.go b/internal/app/spider/update_dynamic.go index 76986ab..be6b572 100644 --- a/internal/app/spider/update_dynamic.go +++ b/internal/app/spider/update_dynamic.go @@ -80,16 +80,16 @@ func (u *UpdateDynamic) spider() error { return nil } for _, v := range list { - dynamicCard, err := u.sdk.Dynamic(v.DynamicID) + dynamic, err := u.sdk.Dynamic(v.DynamicID) if err != nil { u.logger.Error("Dynamic error", zap.Int("dynamic_id", int(v.DynamicID)), zap.Error(err)) continue } updates := map[string]interface{}{ - "view": dynamicCard.Desc.View, - "repost": dynamicCard.Desc.Repost, - "comment": dynamicCard.Desc.Comment, - "like": dynamicCard.Desc.Like, + "view": dynamic.Card.Desc.View, + "repost": dynamic.Card.Desc.Repost, + "comment": dynamic.Card.Desc.Comment, + "like": dynamic.Card.Desc.Like, } if err := repo.Update(updates, v.DynamicID); err != nil { u.logger.Error("Dynamic Update error", zap.Int("dynamic_id", int(v.DynamicID)), zap.Error(err)) diff --git a/internal/pkg/bilibili/api_test.go b/internal/pkg/bilibili/api_test.go new file mode 100644 index 0000000..7ba11d2 --- /dev/null +++ b/internal/pkg/bilibili/api_test.go @@ -0,0 +1,30 @@ +package bilibili + +import ( + "testing" + + "go.uber.org/zap" +) + +func TestDynamic(t *testing.T) { + sdk := NewSDK(&zap.Logger{}) + var dynamicID uint64 = 752843351274815520 + res, err := sdk.Dynamic(uint64(dynamicID)) + if err != nil { + t.Error(err) + return + } + t.Log(res) +} + +func TestDynamicList(t *testing.T) { + sdk := NewSDK(&zap.Logger{}) + name := "EOE的魔法盒" + offset := 0 + res, err := sdk.TopicDynamics(name, uint64(offset)) + if err != nil { + t.Error(err) + return + } + t.Log(len(res.Cards)) +} diff --git a/internal/pkg/bilibili/video.go b/internal/pkg/bilibili/video.go index 8d8b3b8..1abf454 100644 --- a/internal/pkg/bilibili/video.go +++ b/internal/pkg/bilibili/video.go @@ -60,12 +60,15 @@ type ResponseBasic struct { Data interface{} `json:"data"` } -type DynamicInfo struct { +type DynamicList struct { Cards []DynamicCard `json:"cards"` HasMore uint `json:"has_more"` Offset string `json:"offset"` } +type Dynamic struct { + Card DynamicCard `json:"card"` +} type DynamicCard struct { Desc struct { Type DynamicType `json:"type"` @@ -74,7 +77,7 @@ type DynamicCard struct { Comment uint64 `json:"comment"` Like uint64 `json:"like"` UID uint64 `json:"uid"` - DynamicID string `json:"dynamic_id"` + DynamicID uint64 `json:"dynamic_id"` TimeStamp uint64 `json:"timestamp"` } `json:"desc"` Card string `json:"card"` @@ -92,7 +95,7 @@ type DynamicPicture struct { Height float64 `json:"img_height"` Size float64 `json:"img_size"` Width float64 `json:"img_width"` - ImgSrc string `json:"image_src"` + ImgSrc string `json:"img_src"` } type VideoSearchInfo struct { Type string `json:"type"` @@ -350,7 +353,7 @@ func (sdk *SDK) VideoWebTagInfo(aid string) (data *VideoTagResponse, err error) return data, nil } -func (sdk *SDK) TopicDynamics(topicName string, offsetDynamicId uint64) (data *DynamicInfo, err error) { +func (sdk *SDK) TopicDynamics(topicName string, offsetDynamicId uint64) (data *DynamicList, err error) { params := url.Values{} params.Add("topic_name", topicName) url := fmt.Sprintf(topicHistory, offsetDynamicId) @@ -361,8 +364,9 @@ func (sdk *SDK) TopicDynamics(topicName string, offsetDynamicId uint64) (data *D return data, nil } -func (sdk *SDK) Dynamic(dynamicId uint64) (data *DynamicCard, err error) { - url := fmt.Sprintf(topicHistory, dynamicId) +func (sdk *SDK) Dynamic(dynamicId uint64) (data *Dynamic, err error) { + url := fmt.Sprintf(dynamic, dynamicId) + fmt.Println(url) if err = sdk.fastGet(url, &data); err != nil { return nil, err } diff --git a/internal/repository/bilibili_picture.go b/internal/repository/bilibili_picture.go index 2b083e7..e526771 100644 --- a/internal/repository/bilibili_picture.go +++ b/internal/repository/bilibili_picture.go @@ -57,9 +57,13 @@ func (impl *BilibiliPictureMysqlImpl) Latest(page, size, topicID int) (list []*i if topicID != 0 { conn = conn.Where("topic_id = ?", topicID) } + offset := (page - 1) * size + if offset < 0 { + offset = -1 + } err = conn.Select("dynamic_id,pictures,sent_at"). Order("sent_at DESC"). - Offset((page - 1) * size). + Offset(offset). Limit(size).Find(&list).Error if err != nil { return nil, err @@ -67,14 +71,19 @@ func (impl *BilibiliPictureMysqlImpl) Latest(page, size, topicID int) (list []*i return list, nil } -func (impl *BilibiliPictureMysqlImpl) Recommend(from, to time.Time, size, topicID int) (list []*idl.BilibiliDynamic, err error) { +func (impl *BilibiliPictureMysqlImpl) Recommend(from, to time.Time, page, size, topicID int) (list []*idl.BilibiliDynamic, err error) { conn := impl.tx.Table(idl.BilibiliDynamic{}.TableName()) if topicID != 0 { conn = conn.Where("topic_id = ?", topicID) } + offset := (page - 1) * size + if offset < 0 { + offset = -1 + } err = conn.Select("dynamic_id,pictures,sent_at"). Where("sent_at >= ? AND sent_at <= ?", from.Unix(), to.Unix()). Order("favor DESC"). + Offset(offset). Limit(size).Find(&list).Error if err != nil { return nil, err From dcc2ee21951d8a81241bd6573b55b1c82945e9a4 Mon Sep 17 00:00:00 2001 From: bianjiajie Date: Mon, 23 Jan 2023 14:25:28 +0800 Subject: [PATCH 6/6] =?UTF-8?q?[fix]=20=E4=BF=AE=E5=A4=8D=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E6=97=B6=E9=94=99=E8=AF=AF=E7=9A=84=E5=88=97=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/app/spider/update_dynamic.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/app/spider/update_dynamic.go b/internal/app/spider/update_dynamic.go index be6b572..d01369b 100644 --- a/internal/app/spider/update_dynamic.go +++ b/internal/app/spider/update_dynamic.go @@ -86,10 +86,10 @@ func (u *UpdateDynamic) spider() error { continue } updates := map[string]interface{}{ - "view": dynamic.Card.Desc.View, - "repost": dynamic.Card.Desc.Repost, - "comment": dynamic.Card.Desc.Comment, - "like": dynamic.Card.Desc.Like, + "view_nums": dynamic.Card.Desc.View, + "repost": dynamic.Card.Desc.Repost, + "comment_nums": dynamic.Card.Desc.Comment, + "favor": dynamic.Card.Desc.Like, } if err := repo.Update(updates, v.DynamicID); err != nil { u.logger.Error("Dynamic Update error", zap.Int("dynamic_id", int(v.DynamicID)), zap.Error(err))