diff --git a/database/init.sql b/database/init.sql index 74a9bf1..0dfbe7e 100644 --- a/database/init.sql +++ b/database/init.sql @@ -130,4 +130,21 @@ create table bilibili_dynamics ( alter table bilibili_dynamics add column topic_details json default null comment '动态的#xxx#' after pictures; alter table bilibili_dynamics add column feedback int default 0 comment '反馈类型' after dynamic_id; -alter table bilibili_dynamics add column verify boolean default false comment '是否审核过' after feedback; \ No newline at end of file +alter table bilibili_dynamics add column verify boolean default false comment '是否审核过' after feedback; +alter table bilibili_dynamics add column pictures_num int default 0 comment '图片数量' after pictures; +alter table bilibili_dynamics add column actual_topic_id bigint default 0 comment '实际的话题id' after topic_id; +alter table bilibili_dynamics add column cp_topic_id bigint default 0 comment 'cp的话题id' after topic_id; + +create table bilibili_pictures ( + id bigint unsigned not null auto_increment primary key comment 'id', + img_src varchar(255) not null comment '图片链接', + img_attr json not null comment '图片属性', + dynamic_id bigint unsigned not null comment 'B站动态id', + feedback int default 0 comment '反馈类型', + verify boolean default false comment '是否审核过', + created_at bigint unsigned not null comment '创建时间', + updated_at bigint unsigned not null comment '更新时间', + deleted_at datetime null comment '删除时间', + index idx_dynamic_id(dynamic_id) comment '动态id索引', + index idx_img_src(img_src) comment '图片链接索引' +)Engine=InnoDB comment '动态' charset 'utf8mb4'; \ No newline at end of file diff --git a/internal/app/api/idl/bilibili_picture.go b/internal/app/api/idl/bilibili_picture.go index 990aba9..7fd5b17 100644 --- a/internal/app/api/idl/bilibili_picture.go +++ b/internal/app/api/idl/bilibili_picture.go @@ -5,6 +5,8 @@ import ( "encoding/json" "errors" "time" + + "gorm.io/gorm" ) type DynamicFeedback int @@ -25,24 +27,60 @@ var DynamicFeedbackMap = map[DynamicFeedback]struct{}{ Uncomfortable: {}, } +// 图片库 +type BilibiliPicture struct { + ID uint64 `gorm:"primarykey"` + DynamicID uint64 `gorm:"column:dynamic_id"` + ImgSrc string `gorm:"column:img_src"` + ImgAttr BilibiliPictureAttr `gorm:"column:img_attr"` + Feedback DynamicFeedback `gorm:"column:feedback"` + Verify bool `gorm:"column:verify"` + CreatedAt uint64 `gorm:"autoCreateTime"` + UpdatedAt uint64 `gorm:"autoUpdateTime"` + DeletedAt gorm.DeletedAt `gorm:"index"` + Dynamic *BilibiliDynamic `gorm:"foreignKey:DynamicID;references:DynamicID"` +} + // 图片来源于动态,以动态为单位 type BilibiliDynamic struct { - ID uint64 `gorm:"primarykey"` - UID uint64 `gorm:"column:uid"` - DynamicID uint64 `gorm:"column:dynamic_id"` - Feedback DynamicFeedback `gorm:"column:feedback"` - Verify bool `gorm:"column:verify"` - Pictures BilibiliDynamicPictures `gorm:"column:pictures"` - TopicDetails *BilibiliDynamicTopicDetails `gorm:"topic_details"` - TopicName string `gorm:"column:topic_name"` - TopicID uint64 `gorm:"column:topic_id"` - View uint64 `gorm:"column:view_nums"` - Repost uint64 `gorm:"column:repost"` - Comment uint64 `gorm:"column:comment_nums"` - Like uint64 `gorm:"column:favor"` - SentAt uint64 `gorm:"column:sent_at"` - CreatedAt uint64 `gorm:"autoCreateTime"` - UpdatedAt uint64 `gorm:"autoUpdateTime"` + ID uint64 `gorm:"primarykey"` + UID uint64 `gorm:"column:uid"` + DynamicID uint64 `gorm:"column:dynamic_id"` + Feedback DynamicFeedback `gorm:"column:feedback"` + Verify bool `gorm:"column:verify"` + Pictures BilibiliDynamicPictures `gorm:"column:pictures"` + PicturesNum int `gorm:"column:pictures_num"` + TopicDetails *BilibiliDynamicTopicDetails `gorm:"topic_details"` + TopicName string `gorm:"column:topic_name"` + TopicID uint64 `gorm:"column:topic_id"` + ActualTopicID uint64 `gorm:"column:actual_topic_id"` + CPTopicID uint64 `gorm:"column:cp_topic_id"` + View uint64 `gorm:"column:view_nums"` + Repost uint64 `gorm:"column:repost"` + Comment uint64 `gorm:"column:comment_nums"` + Like uint64 `gorm:"column:favor"` + SentAt uint64 `gorm:"column:sent_at"` + CreatedAt uint64 `gorm:"autoCreateTime"` + UpdatedAt uint64 `gorm:"autoUpdateTime"` + Pics []BilibiliPicture `gorm:"foreignKey:DynamicID;references:DynamicID"` +} + +func (p BilibiliPictureAttr) Value() (driver.Value, error) { + return json.Marshal(p) +} + +func (c *BilibiliPictureAttr) Scan(input interface{}) error { + data, ok := input.([]byte) + if !ok { + return errors.New("invalid input in Scan") + } + result := BilibiliPictureAttr{} + err := json.Unmarshal(data, &result) + if err != nil { + return err + } + *c = result + return nil } func (p BilibiliDynamicPictures) Value() (driver.Value, error) { @@ -139,10 +177,14 @@ type BilibiliRandomPictureTag struct { } type BilibiliDynamicPictures []BilibiliDynamicPicture type BilibiliDynamicPicture struct { + BilibiliPictureAttr + ImgSrc string `json:"img_src"` +} + +type BilibiliPictureAttr struct { Height float64 `json:"img_height"` Size float64 `json:"img_size"` Width float64 `json:"img_width"` - ImgSrc string `json:"img_src"` } type BilibiliDynamicTopicDetails []BilibiliDynamicTopicDetail @@ -155,6 +197,10 @@ func (BilibiliDynamic) TableName() string { return "bilibili_dynamics" } +func (BilibiliPicture) TableName() string { + return "bilibili_pictures" +} + type BilibiliPictureRepository interface { Create(items []*BilibiliDynamic) error FindMaxDynamicID(topicName string) (*uint64, error) diff --git a/internal/app/api/service/bilbil_picture.go b/internal/app/api/service/bilbil_picture.go index 6549795..021048a 100644 --- a/internal/app/api/service/bilbil_picture.go +++ b/internal/app/api/service/bilbil_picture.go @@ -139,7 +139,7 @@ func (service *BilbilPicture) Recommend(ctx context.Context, req idl.BilibiliPic tx := service.db.WithContext(ctx) picRepository := repository.NewBilibiliPicture(tx) now := time.Now() - list, err := picRepository.Recommend(now.Add(-(30 * 24 * time.Hour)), now, req.Page, picRecommendDefaultSize, req.TopicID) + list, err := picRepository.Recommend(now.Add(-(15 * 24 * time.Hour)), now, req.Page, picRecommendDefaultSize, req.TopicID) if err != nil { return nil, err } diff --git a/internal/app/spider/picture.go b/internal/app/spider/picture.go index 2e015e9..ec2e001 100644 --- a/internal/app/spider/picture.go +++ b/internal/app/spider/picture.go @@ -82,100 +82,116 @@ func (p *Picture) Run(ctx context.Context) error { } func (p *Picture) spider() error { - //把当前数据库最大的动态ID查出来 - //调用接口,将大于当前动态ID的都入DB,如果存在小于的,则可提前结束,尽量保证没有重复数据 - topicsMap := map[string]uint64{ - bilibili.TopicNameGoGo: bilibili.TopicIDGoGo, - bilibili.TopicNameMino: bilibili.TopicIDMino, - bilibili.TopicNameUn: bilibili.TopicIDUn, - bilibili.TopicNameMoMo: bilibili.TopicIDMoMo, - bilibili.TopicNameWan: bilibili.TopicIDWan, - bilibili.TopicNameEOE: bilibili.TopicIDEOE, + topicsMap := map[uint64][]uint64{ + bilibili.TopicIDGoGo: {bilibili.TopicIDGoGo, 28039056, 28039057, 28621067, 30029596, 30387922}, + bilibili.TopicIDMino: {bilibili.TopicIDMino, 28045077, 28611311, 31329504, 28235940, 28197598}, + bilibili.TopicIDUn: {bilibili.TopicIDUn, 28187701}, + bilibili.TopicIDMoMo: {bilibili.TopicIDMoMo, 28055152, 28077478, 28298854, 28535695}, + bilibili.TopicIDWan: {bilibili.TopicIDWan, 17283297, 28044522, 28653712, 28909298, 29297260, 31565489}, + bilibili.TopicIDEOE: {bilibili.TopicIDEOE, 28627394}, } black := map[uint64]struct{}{ 383884380: {}, //水图太多,大部分是食物图 } - for topicName, topicID := range topicsMap { - curMaxDynamicID, err := repository.NewBilibiliPicture(p.db).FindMaxDynamicID(topicName) + cpMap := map[uint64]uint64{ + 28909298: bilibili.TopicIDGoGo, + 29297260: bilibili.TopicIDMino, + 31565489: bilibili.TopicIDMino, + 30387922: bilibili.TopicIDMoMo, + 28235940: bilibili.TopicIDUn, + 28535695: bilibili.TopicIDUn, + 28197598: bilibili.TopicIDMoMo, + } + standard := time.Now().Add(-(time.Hour * 24)).Unix() + for topicID, vec := range topicsMap { + for _, actualTopID := range vec { + var cpTopicID uint64 + res, ok := cpMap[actualTopID] + if ok { + cpTopicID = res + } + p.get(topicID, actualTopID, cpTopicID, uint64(standard), black) + } + } + return nil +} + +func (p *Picture) get(topicID, actualTopID, cpTopicID, standard uint64, black map[uint64]struct{}) { + var hasMore uint = 1 + var offset uint64 = 0 + exist := false //判断有没有已经爬过 + for hasMore == 1 && !exist { + time.Sleep(400 * time.Millisecond) + data, err := p.sdk.TopicDynamics(actualTopID, offset) if err != nil { - p.logger.Error("FindMaxDynamicID error", zap.String("topic_name", topicName), zap.Error(err)) - continue + p.logger.Error("TopicDynamics error", zap.Uint64("topic_id", actualTopID), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + time.Sleep(500 * time.Millisecond) + break } - var hasMore uint = 1 - var offset uint64 = 0 - exist := false //判断有没有已经爬过 - for hasMore == 1 && !exist { - time.Sleep(400 * time.Millisecond) - data, err := p.sdk.TopicDynamics(topicName, offset) - if err != nil { - p.logger.Error("TopicDynamics error", zap.String("topic_name", topicName), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) - time.Sleep(500 * time.Millisecond) - break - } - hasMore = data.HasMore - dynamicID, err := strconv.ParseUint(data.Offset, 10, 64) - if err == nil { - offset = dynamicID - } else { - hasMore = 0 - } - items := make([]*idl.BilibiliDynamic, 0) - for _, v := range data.Cards { - switch v.Desc.Type { - case bilibili.DynamicDraw: - if v.Desc.DynamicID <= *curMaxDynamicID { - //后面所有的都是爬过的,提前结束,后续也不再请求api - exist = true - break - } - if _, ok := black[v.Desc.UID]; ok { - continue - } - dynamic := &idl.BilibiliDynamic{ - UID: v.Desc.UID, - DynamicID: v.Desc.DynamicID, - TopicName: topicName, - TopicID: topicID, - View: v.Desc.View, - Repost: v.Desc.Repost, - Comment: v.Desc.Comment, - Like: v.Desc.Like, - SentAt: v.Desc.TimeStamp, - } - pictures, err := parsePicturesFromCard(v.Card) - if err != nil { - p.logger.Error("ParsePicturesFromCard error", zap.String("topic_name", topicName), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) - continue - } - if len(pictures) == 0 { - continue - } - dynamic.Pictures = pictures - topicDetails := make(idl.BilibiliDynamicTopicDetails, 0) - for _, v := range v.Display.TopicInfo.TopicDetails { - topicDetails = append(topicDetails, idl.BilibiliDynamicTopicDetail{ - TopicID: v.TopicID, - TopicName: v.TopicName, - }) - } - if len(topicDetails) != 0 { - dynamic.TopicDetails = &topicDetails - } - items = append(items, dynamic) - default: + hasMore = data.HasMore + dynamicID, err := strconv.ParseUint(data.Offset, 10, 64) + if err == nil { + offset = dynamicID + } else { + hasMore = 0 + } + items := make([]*idl.BilibiliDynamic, 0) + for _, v := range data.Cards { + switch v.Desc.Type { + case bilibili.DynamicDraw: + if v.Desc.TimeStamp <= uint64(standard) { + //后面所有的都是爬过的,提前结束,后续也不再请求api + exist = true + break + } + if _, ok := black[v.Desc.UID]; ok { continue } - } - //插入数据 - if len(items) != 0 { - err := repository.NewBilibiliPicture(p.db).Create(items) + dynamic := &idl.BilibiliDynamic{ + UID: v.Desc.UID, + DynamicID: v.Desc.DynamicID, + TopicID: topicID, + ActualTopicID: actualTopID, + CPTopicID: cpTopicID, + View: v.Desc.View, + Repost: v.Desc.Repost, + Comment: v.Desc.Comment, + Like: v.Desc.Like, + SentAt: v.Desc.TimeStamp, + } + pictures, err := parsePicturesFromCard(v.Card) if err != nil { - p.logger.Error("Create bilibli_pictures error", zap.String("topic_name", topicName), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + p.logger.Error("ParsePicturesFromCard error", zap.Uint64("topic_id", actualTopID), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) + continue + } + if len(pictures) == 0 { + continue + } + dynamic.Pictures = pictures + dynamic.PicturesNum = len(pictures) + topicDetails := make(idl.BilibiliDynamicTopicDetails, 0) + for _, v := range v.Display.TopicInfo.TopicDetails { + topicDetails = append(topicDetails, idl.BilibiliDynamicTopicDetail{ + TopicID: v.TopicID, + TopicName: v.TopicName, + }) + } + if len(topicDetails) != 0 { + dynamic.TopicDetails = &topicDetails } + items = append(items, dynamic) + default: + continue + } + } + //插入数据 + if len(items) != 0 { + err := repository.NewBilibiliPicture(p.db).Create(items) + if err != nil { + p.logger.Error("Create bilibli_pictures error", zap.Uint64("topic_id", actualTopID), zap.String("offset", fmt.Sprintf("%d", offset)), zap.Error(err)) } } } - return nil } func parsePicturesFromCard(data string) ([]idl.BilibiliDynamicPicture, error) { @@ -185,12 +201,16 @@ func parsePicturesFromCard(data string) ([]idl.BilibiliDynamicPicture, error) { } pics := make([]idl.BilibiliDynamicPicture, 0, len(content.Item.Pictures)) for _, v := range content.Item.Pictures { - pics = append(pics, idl.BilibiliDynamicPicture{ + attr := idl.BilibiliPictureAttr{ Height: v.Height, Size: v.Size, Width: v.Width, + } + p := idl.BilibiliDynamicPicture{ ImgSrc: v.ImgSrc, - }) + } + p.BilibiliPictureAttr = attr + pics = append(pics, p) } return pics, nil } diff --git a/internal/app/spider/update_dynamic.go b/internal/app/spider/update_dynamic.go index b7f7340..6581758 100644 --- a/internal/app/spider/update_dynamic.go +++ b/internal/app/spider/update_dynamic.go @@ -74,7 +74,7 @@ func (u *UpdateDynamic) spider() error { size := 100 for p := 1; true; p++ { - list, err := repo.FindAllByPubDate(time.Now().Add(-(3 * 24 * time.Hour)), time.Now(), int64(p), int64(size)) + list, err := repo.FindAllByPubDate(time.Now().Add(-(15 * 24 * time.Hour)), time.Now(), int64(p), int64(size)) if err != nil { u.logger.Error("[UpdateDynamic spider()]FindAllByPubDate error", zap.Int("page", p), zap.Error(err)) return nil diff --git a/internal/pkg/bilibili/api_test.go b/internal/pkg/bilibili/api_test.go index 7ba11d2..4a1f15d 100644 --- a/internal/pkg/bilibili/api_test.go +++ b/internal/pkg/bilibili/api_test.go @@ -19,9 +19,8 @@ func TestDynamic(t *testing.T) { func TestDynamicList(t *testing.T) { sdk := NewSDK(&zap.Logger{}) - name := "EOE的魔法盒" offset := 0 - res, err := sdk.TopicDynamics(name, uint64(offset)) + res, err := sdk.TopicDynamics(TopicIDEOE, uint64(offset)) if err != nil { t.Error(err) return diff --git a/internal/pkg/bilibili/video.go b/internal/pkg/bilibili/video.go index 7a19699..b0ee89c 100644 --- a/internal/pkg/bilibili/video.go +++ b/internal/pkg/bilibili/video.go @@ -3,7 +3,6 @@ package bilibili import ( "fmt" "net/http" - "net/url" "git.vtb.link/eoefans/internal/pkg/httpclient" "github.com/go-resty/resty/v2" @@ -15,7 +14,7 @@ const ( webVideoSearchURL = "https://api.bilibili.com/x/web-interface/search/type?context=&search_type=video&page=%d&order=pubdate&keyword=%s&duration=0&category_id=&tids_2=&__refresh__=true&_extra=&tids=0&highlight=1&single_column=0" webVideoInfoURL = "https://api.bilibili.com/x/web-interface/view?bvid=%s" webVideoTagInfoURL = "https://api.bilibili.com/x/web-interface/view/detail/tag?aid=%s" - topicHistory = "https://api.vc.bilibili.com/topic_svr/v1/topic_svr/topic_history?offset_dynamic_id=%d&" + topicHistory = "https://api.vc.bilibili.com/topic_svr/v1/topic_svr/topic_history?offset_dynamic_id=%d&topic_id=%d" dynamic = "https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/get_dynamic_detail?dynamic_id=%d" ) @@ -28,6 +27,46 @@ type DynamicType uint const ( DynamicDraw DynamicType = 2 //图片动态 ) + +// #莞儿# 17283297 +// #莞儿睡不醒# 28044522 +// #露早# 28039056 +// #露早GOGO# 28039057 +// #米诺高分少女# 28045077 +// #虞莫# 28055152 +// #虞莫MOMO# 28077478 +// #柚恩# 28055153 +// #柚恩不加糖# 28088188 +// #EOE组合# 28039837 + +// 非官方 +// 莞儿有引力 28653712 +// 露早的汪汪日记 28621067 +// 米诺与小恶魔的低语时刻 28611311 +// 么么莫莫宝 28298854 +// 和柚恩的婚后生活 28187701 +// EOE的魔法森林 28627394 + +// #露早生日快乐# 30029596 +// #米诺生日快乐# 31329504 +// #莞儿生日快乐# +// #柚恩生日快乐# +// #虞莫生日快乐# + +// CP +// 莞儿×露早 #早莞在一起# 28909298 +// 莞儿×柚恩 #莞柚引力# 29297260 +// 莞儿×米诺 #一莞米# 31565489 +// 莞儿×虞莫 #虞舟唱莞# + +// 露早×柚恩 #早柚#、#柚子露# +// 露早×米诺 #西米露# +// 露早×虞莫 #早有虞谋# 30387922 + +// 柚恩×米诺 #米哈柚# 28235940 +// 柚恩×虞莫 #虞香柚丝# 28535695 + +// 米诺×虞莫 #米虞说的道理# 28197598 const ( //topicHistory用topic_id查出来的数据有问题,故暂时用topic_name TopicNameWan = "小莞熊在这里" @@ -364,11 +403,8 @@ func (sdk *SDK) VideoWebTagInfo(aid string) (data *VideoTagResponse, err error) return data, nil } -func (sdk *SDK) TopicDynamics(topicName string, offsetDynamicId uint64) (data *DynamicList, err error) { - params := url.Values{} - params.Add("topic_name", topicName) - url := fmt.Sprintf(topicHistory, offsetDynamicId) - url = url + params.Encode() +func (sdk *SDK) TopicDynamics(topicID uint64, offsetDynamicId uint64) (data *DynamicList, err error) { + url := fmt.Sprintf(topicHistory, offsetDynamicId, topicID) if err = sdk.fastGet(url, &data); err != nil { return nil, err } diff --git a/internal/repository/bilibili_picture.go b/internal/repository/bilibili_picture.go index e8fc036..1439e18 100644 --- a/internal/repository/bilibili_picture.go +++ b/internal/repository/bilibili_picture.go @@ -6,6 +6,7 @@ import ( "git.vtb.link/eoefans/internal/app/api/idl" "gorm.io/gorm" + "gorm.io/gorm/clause" ) func NewBilibiliPicture(tx *gorm.DB) idl.BilibiliPictureRepository { @@ -20,7 +21,34 @@ func (impl *BilibiliPictureMysqlImpl) Create(items []*idl.BilibiliDynamic) error if len(items) == 0 { return nil } - return impl.tx.Table(idl.BilibiliDynamic{}.TableName()).Create(&items).Error + picItems := make([]idl.BilibiliPicture, 0) + for i := range items { + for j := range items[i].Pictures { + imgAttr := idl.BilibiliPictureAttr{ + Height: items[i].Pictures[j].Height, + Size: items[i].Pictures[j].Size, + Width: items[i].Pictures[j].Width, + } + picItems = append(picItems, idl.BilibiliPicture{ + DynamicID: items[i].DynamicID, + ImgSrc: items[i].Pictures[j].ImgSrc, + ImgAttr: imgAttr, + }) + } + } + return impl.tx.Transaction(func(_tx *gorm.DB) error { + err := _tx.Table(idl.BilibiliDynamic{}.TableName()).Clauses(clause.OnConflict{ + Columns: []clause.Column{{Name: "dynamic_id"}}, + DoUpdates: clause.AssignmentColumns([]string{"dynamic_id"}), + }).Create(&items).Error + if err != nil { + return err + } + return _tx.Table(idl.BilibiliPicture{}.TableName()).Clauses(clause.OnConflict{ + Columns: []clause.Column{{Name: "img_src"}}, + DoUpdates: clause.AssignmentColumns([]string{"img_src"}), + }).Create(&picItems).Error + }) } func (impl *BilibiliPictureMysqlImpl) Update(updates map[string]interface{}, dynamicID uint64) error { @@ -56,7 +84,7 @@ func (impl *BilibiliPictureMysqlImpl) FindAllByPubDate(from, to time.Time, page, func (impl *BilibiliPictureMysqlImpl) Latest(page, size, topicID int) (list []*idl.BilibiliDynamic, err error) { conn := impl.tx.Table(idl.BilibiliDynamic{}.TableName()) if topicID != 0 { - conn = conn.Where("topic_id = ?", topicID) + conn = conn.Where("topic_id = ? or cp_topic_id = ?", topicID, topicID) } offset := (page - 1) * size if offset < 0 { @@ -75,7 +103,7 @@ func (impl *BilibiliPictureMysqlImpl) Latest(page, size, topicID int) (list []*i func (impl *BilibiliPictureMysqlImpl) Recommend(from, to time.Time, page, size, topicID int) (list []*idl.BilibiliDynamic, err error) { conn := impl.tx.Table(idl.BilibiliDynamic{}.TableName()) if topicID != 0 { - conn = conn.Where("topic_id = ?", topicID) + conn = conn.Where("topic_id = ? or cp_topic_id = ?", topicID, topicID) } offset := (page - 1) * size if offset < 0 {