feat: 部署初版测试
This commit is contained in:
147
server/internal/worker/monitor_worker.go
Normal file
147
server/internal/worker/monitor_worker.go
Normal file
@@ -0,0 +1,147 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/zs/InsightReply/internal/model"
|
||||
"github.com/zs/InsightReply/internal/repository"
|
||||
"github.com/zs/InsightReply/internal/scraper"
|
||||
)
|
||||
|
||||
type MonitorWorker struct {
|
||||
repo *repository.CompetitorMonitorRepository
|
||||
tweetRepo *repository.TweetRepository
|
||||
client *scraper.ScraperClient
|
||||
baseUrl string
|
||||
}
|
||||
|
||||
func NewMonitorWorker(repo *repository.CompetitorMonitorRepository, tweetRepo *repository.TweetRepository) *MonitorWorker {
|
||||
return &MonitorWorker{
|
||||
repo: repo,
|
||||
tweetRepo: tweetRepo,
|
||||
client: scraper.NewScraperClient(),
|
||||
baseUrl: "https://x.beenglish.eu.org", // Self-hosted Nitter instance
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins the background job loop. This should be run in a goroutine.
|
||||
func (w *MonitorWorker) Start(ctx context.Context, interval time.Duration) {
|
||||
log.Printf("[MonitorWorker] Starting background scraping loop every %v", interval)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Initial run
|
||||
w.runCycle(ctx)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Println("[MonitorWorker] Stopping background scraping loop")
|
||||
return
|
||||
case <-ticker.C:
|
||||
w.runCycle(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *MonitorWorker) runCycle(ctx context.Context) {
|
||||
log.Println("[MonitorWorker] Starting scrape cycle...")
|
||||
|
||||
monitors, err := w.repo.ListAllActive()
|
||||
if err != nil {
|
||||
log.Printf("[MonitorWorker] Error fetching active monitors: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(monitors) == 0 {
|
||||
log.Println("[MonitorWorker] No active monitors found. Skipping cycle.")
|
||||
return
|
||||
}
|
||||
|
||||
for _, monitor := range monitors {
|
||||
// Stop processing if context cancelled (e.g., app shutdown)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
// Determine Scraping Strategy
|
||||
var url string
|
||||
|
||||
// URL encode the brand name which acts as our keyword
|
||||
keyword := monitor.BrandName
|
||||
|
||||
if monitor.XHandle != "" {
|
||||
if monitor.XHandle == keyword || keyword == "" {
|
||||
// Standard profile timeline scraping
|
||||
log.Printf("[MonitorWorker] Scraping timeline for account @%s", monitor.XHandle)
|
||||
url = fmt.Sprintf("%s/%s", w.baseUrl, monitor.XHandle)
|
||||
} else {
|
||||
// Combo scraping: Keyword + Specific Account
|
||||
log.Printf("[MonitorWorker] Scraping combo: '%s' from @%s", keyword, monitor.XHandle)
|
||||
url = fmt.Sprintf("%s/search?f=tweets&q=%s+from%%3A%s", w.baseUrl, keyword, monitor.XHandle)
|
||||
}
|
||||
} else if keyword != "" {
|
||||
// Global search for Keyword across X
|
||||
log.Printf("[MonitorWorker] Scraping global search for keyword: '%s'", keyword)
|
||||
url = fmt.Sprintf("%s/search?f=tweets&q=%s", w.baseUrl, keyword)
|
||||
} else {
|
||||
continue // Invalid monitor config
|
||||
}
|
||||
|
||||
w.scrapeAndLog(url)
|
||||
|
||||
// Anti-Ban: Jitter delay between requests (3s to 8s)
|
||||
w.client.JitterDelay(3000, 8000)
|
||||
}
|
||||
|
||||
log.Println("[MonitorWorker] Scrape cycle completed.")
|
||||
}
|
||||
|
||||
func (w *MonitorWorker) scrapeAndLog(url string) {
|
||||
htmlData, err := w.client.Fetch(url)
|
||||
if err != nil {
|
||||
log.Printf("[MonitorWorker] Error scraping %s: %v", url, err)
|
||||
return
|
||||
}
|
||||
|
||||
tweets, err := scraper.ParseTimeline(htmlData)
|
||||
if err != nil {
|
||||
log.Printf("[MonitorWorker] Error parsing HTML for %s: %v", url, err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("[MonitorWorker] Extracted %d tweets from %s", len(tweets), url)
|
||||
|
||||
// Epic 6: Upsert into tracking database
|
||||
upsertCount := 0
|
||||
for _, rawTweet := range tweets {
|
||||
tweet := &model.Tweet{
|
||||
XTweetID: rawTweet.ID,
|
||||
AuthorHandle: rawTweet.Handle,
|
||||
Content: rawTweet.Content,
|
||||
PostedAt: rawTweet.CreatedAt,
|
||||
LikeCount: rawTweet.Likes,
|
||||
RetweetCount: rawTweet.Retweets,
|
||||
ReplyCount: rawTweet.Replies,
|
||||
CrawlQueue: "normal",
|
||||
IsProcessed: false,
|
||||
LastCrawledAt: time.Now(),
|
||||
}
|
||||
|
||||
// Save/Update in DB
|
||||
err := w.tweetRepo.Upsert(tweet)
|
||||
if err != nil {
|
||||
log.Printf("[MonitorWorker] Error UPSERTing tweet %s: %v", tweet.XTweetID, err)
|
||||
} else {
|
||||
upsertCount++
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("[MonitorWorker] Successfully Upserted %d/%d tweets to the database.", upsertCount, len(tweets))
|
||||
}
|
||||
142
server/internal/worker/performance_worker.go
Normal file
142
server/internal/worker/performance_worker.go
Normal file
@@ -0,0 +1,142 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/zs/InsightReply/internal/model"
|
||||
"github.com/zs/InsightReply/internal/repository"
|
||||
"github.com/zs/InsightReply/internal/scraper"
|
||||
"github.com/zs/InsightReply/internal/service"
|
||||
)
|
||||
|
||||
type PerformanceWorker struct {
|
||||
repo *repository.ReplyRepository
|
||||
client *scraper.ScraperClient
|
||||
aiSvc *service.AIService
|
||||
baseUrl string
|
||||
}
|
||||
|
||||
func NewPerformanceWorker(repo *repository.ReplyRepository, aiSvc *service.AIService) *PerformanceWorker {
|
||||
return &PerformanceWorker{
|
||||
repo: repo,
|
||||
client: scraper.NewScraperClient(),
|
||||
aiSvc: aiSvc,
|
||||
baseUrl: "https://x.beenglish.eu.org",
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins the 24h retroactive performance checking loop
|
||||
func (w *PerformanceWorker) Start(ctx context.Context, interval time.Duration) {
|
||||
log.Printf("[PerformanceWorker] Starting retroactive engagement tracking every %v", interval)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Println("[PerformanceWorker] Stopping background performance loop")
|
||||
return
|
||||
case <-ticker.C:
|
||||
w.runCycle(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *PerformanceWorker) runCycle(ctx context.Context) {
|
||||
pending, err := w.repo.GetPendingPerformanceChecks()
|
||||
if err != nil {
|
||||
log.Printf("[PerformanceWorker] Error fetching pending checks: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, reply := range pending {
|
||||
// Stop processing if context cancelled
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
xTweetID, err := w.repo.GetTweetXTweetID(reply.TweetID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Scrape the specific thread
|
||||
// Nitter handles /i/status/12345 generic routes
|
||||
url := fmt.Sprintf("%s/i/status/%s", w.baseUrl, xTweetID)
|
||||
log.Printf("[PerformanceWorker] Checking thread %s for user's AI reply performance", url)
|
||||
|
||||
htmlData, err := w.client.Fetch(url)
|
||||
if err != nil {
|
||||
w.client.JitterDelay(2000, 5000)
|
||||
continue
|
||||
}
|
||||
|
||||
threadReplies, err := scraper.ParseTimeline(htmlData)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Search for the user's generated text within the thread replies
|
||||
found := false
|
||||
for _, threadReply := range threadReplies {
|
||||
// Basic similarity check: if 50% of the AI sentence is present
|
||||
// Real implementation might use Levenshtein distance, but strings.Contains on chunks works for MVP
|
||||
snippet := reply.Content
|
||||
if len(snippet) > 20 {
|
||||
snippet = snippet[:20]
|
||||
}
|
||||
|
||||
if strings.Contains(threadReply.Content, snippet) {
|
||||
found = true
|
||||
|
||||
// WE FOUND OUR REPLY! Record its metrics
|
||||
perf := &model.ReplyPerformance{
|
||||
ReplyID: reply.ID,
|
||||
UserID: reply.UserID,
|
||||
LikeCountIncrease: threadReply.Likes,
|
||||
ReplyCountIncrease: threadReply.Replies,
|
||||
CheckTime: time.Now(),
|
||||
}
|
||||
|
||||
w.repo.SaveReplyPerformance(perf)
|
||||
log.Printf("[PerformanceWorker] 🎯 Verified AI reply in wild! Likes: %d, Replies: %d", perf.LikeCountIncrease, perf.ReplyCountIncrease)
|
||||
|
||||
// Epic 13 AI Tone Engine: Autonomous Style Cloning for proven viral comments
|
||||
if perf.LikeCountIncrease >= 10 {
|
||||
log.Printf("[PerformanceWorker] Reply went viral! Asking AI to reverse-engineer linguistic styling.")
|
||||
|
||||
styleProfile, err := w.aiSvc.ExtractStyle(ctx, reply.Content)
|
||||
if err == nil && styleProfile != "" {
|
||||
err = w.repo.SaveStyleExtraction(reply.UserID, styleProfile)
|
||||
if err != nil {
|
||||
log.Printf("[PerformanceWorker] Error saving style database mapping: %v", err)
|
||||
} else {
|
||||
log.Printf("[PerformanceWorker] Successfully built user style clone: %s", styleProfile)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Even if not found (maybe they edited heavily or didn't actually post it), we mark it as checked to prevent infinite re-checking
|
||||
if !found {
|
||||
perf := &model.ReplyPerformance{
|
||||
ReplyID: reply.ID,
|
||||
UserID: reply.UserID,
|
||||
CheckTime: time.Now(),
|
||||
}
|
||||
w.repo.SaveReplyPerformance(perf)
|
||||
}
|
||||
|
||||
w.client.JitterDelay(3000, 8000)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user