83 lines
3.7 KiB
Go
83 lines
3.7 KiB
Go
package repository
|
|
|
|
import (
|
|
"github.com/zs/InsightReply/internal/model"
|
|
"gorm.io/gorm"
|
|
"gorm.io/gorm/clause"
|
|
)
|
|
|
|
type TweetRepository struct {
|
|
db *gorm.DB
|
|
}
|
|
|
|
func NewTweetRepository(db *gorm.DB) *TweetRepository {
|
|
return &TweetRepository{db: db}
|
|
}
|
|
|
|
// Upsert intelligently inserts a new tweet or updates an existing one.
|
|
// Crucially, on conflict, it dynamically calculates the 'heat_score' by
|
|
// comparing the new metrics against the old metrics currently in the database.
|
|
func (r *TweetRepository) Upsert(tweet *model.Tweet) error {
|
|
// For new tweets being inserted, their base heat score evaluates to their current absolute metrics.
|
|
// For existing tweets, we calculate the delta and add it to their existing heat score.
|
|
tweet.HeatScore = float64(tweet.LikeCount*1 + tweet.RetweetCount*2 + tweet.ReplyCount*3)
|
|
|
|
err := r.db.Clauses(clause.OnConflict{
|
|
Columns: []clause.Column{{Name: "x_tweet_id"}},
|
|
DoUpdates: clause.Assignments(map[string]interface{}{
|
|
"author_id": clause.Column{Table: "excluded", Name: "author_id"},
|
|
"author_handle": clause.Column{Table: "excluded", Name: "author_handle"},
|
|
"content": clause.Column{Table: "excluded", Name: "content"},
|
|
"posted_at": clause.Column{Table: "excluded", Name: "posted_at"},
|
|
"last_crawled_at": clause.Column{Table: "excluded", Name: "last_crawled_at"},
|
|
"like_count": clause.Column{Table: "excluded", Name: "like_count"},
|
|
"retweet_count": clause.Column{Table: "excluded", Name: "retweet_count"},
|
|
"reply_count": clause.Column{Table: "excluded", Name: "reply_count"},
|
|
// Calculate delta only if the old values exist and are lower than the new values (to prevent negative spikes from X UI glitches).
|
|
// heatTracker = old.heat_score + MAX(0, new.like - old.like)*1 + MAX(0, new.rt - old.rt)*2 + MAX(0, new.reply - old.reply)*3
|
|
"heat_score": gorm.Expr("tweets.heat_score + GREATEST(0, EXCLUDED.like_count - tweets.like_count) * 1.0 + GREATEST(0, EXCLUDED.retweet_count - tweets.retweet_count) * 2.0 + GREATEST(0, EXCLUDED.reply_count - tweets.reply_count) * 3.0"),
|
|
|
|
// Smart Crawling logic: If heat score breaches threshold (e.g. 50), promote to high. If old & cold, demote.
|
|
"crawl_queue": gorm.Expr(`
|
|
CASE
|
|
WHEN tweets.heat_score + GREATEST(0, EXCLUDED.like_count - tweets.like_count) * 1.0 + GREATEST(0, EXCLUDED.retweet_count - tweets.retweet_count) * 2.0 + GREATEST(0, EXCLUDED.reply_count - tweets.reply_count) * 3.0 > 50 THEN 'high'
|
|
WHEN EXCLUDED.last_crawled_at - tweets.posted_at > INTERVAL '7 days' THEN 'low'
|
|
ELSE 'normal'
|
|
END
|
|
`),
|
|
}),
|
|
}).Create(tweet).Error
|
|
|
|
return err
|
|
}
|
|
|
|
// GetTopHeatingTweets returns unprocessed tweets ordered by their generated heat score
|
|
func (r *TweetRepository) GetTopHeatingTweets(limit int) ([]model.Tweet, error) {
|
|
var tweets []model.Tweet
|
|
err := r.db.Where("is_processed = ?", false).Order("heat_score desc").Limit(limit).Find(&tweets).Error
|
|
return tweets, err
|
|
}
|
|
|
|
// MarkAsProcessed tags a tweet so we don't present it to the user repeatedly
|
|
func (r *TweetRepository) MarkAsProcessed(id string) error {
|
|
return r.db.Model(&model.Tweet{}).Where("id = ?", id).Update("is_processed", true).Error
|
|
}
|
|
|
|
// SearchTweets allows dynamic multi-rule filtering
|
|
func (r *TweetRepository) SearchTweets(keyword, handle string, limit int) ([]model.Tweet, error) {
|
|
var tweets []model.Tweet
|
|
query := r.db.Model(&model.Tweet{})
|
|
|
|
if keyword != "" {
|
|
// PostgreSQL ILIKE for case-insensitive keyword searching
|
|
query = query.Where("content ILIKE ?", "%"+keyword+"%")
|
|
}
|
|
|
|
if handle != "" {
|
|
query = query.Where("author_handle = ?", handle)
|
|
}
|
|
|
|
err := query.Order("heat_score desc, posted_at desc").Limit(limit).Find(&tweets).Error
|
|
return tweets, err
|
|
}
|