feat: 部署初版测试
Some checks failed
Extension Build & Release / build (push) Failing after 1m5s
Backend Deploy (Go + Docker) / deploy (push) Failing after 1m40s
Web Console Deploy (Vue 3 + Vite) / deploy (push) Has been cancelled

This commit is contained in:
zs
2026-03-02 21:25:21 +08:00
parent db3abb3174
commit 8cf6cb944b
97 changed files with 10250 additions and 209 deletions

View File

@@ -0,0 +1,147 @@
package worker
import (
"context"
"fmt"
"log"
"time"
"github.com/zs/InsightReply/internal/model"
"github.com/zs/InsightReply/internal/repository"
"github.com/zs/InsightReply/internal/scraper"
)
type MonitorWorker struct {
repo *repository.CompetitorMonitorRepository
tweetRepo *repository.TweetRepository
client *scraper.ScraperClient
baseUrl string
}
func NewMonitorWorker(repo *repository.CompetitorMonitorRepository, tweetRepo *repository.TweetRepository) *MonitorWorker {
return &MonitorWorker{
repo: repo,
tweetRepo: tweetRepo,
client: scraper.NewScraperClient(),
baseUrl: "https://x.beenglish.eu.org", // Self-hosted Nitter instance
}
}
// Start begins the background job loop. This should be run in a goroutine.
func (w *MonitorWorker) Start(ctx context.Context, interval time.Duration) {
log.Printf("[MonitorWorker] Starting background scraping loop every %v", interval)
ticker := time.NewTicker(interval)
defer ticker.Stop()
// Initial run
w.runCycle(ctx)
for {
select {
case <-ctx.Done():
log.Println("[MonitorWorker] Stopping background scraping loop")
return
case <-ticker.C:
w.runCycle(ctx)
}
}
}
func (w *MonitorWorker) runCycle(ctx context.Context) {
log.Println("[MonitorWorker] Starting scrape cycle...")
monitors, err := w.repo.ListAllActive()
if err != nil {
log.Printf("[MonitorWorker] Error fetching active monitors: %v", err)
return
}
if len(monitors) == 0 {
log.Println("[MonitorWorker] No active monitors found. Skipping cycle.")
return
}
for _, monitor := range monitors {
// Stop processing if context cancelled (e.g., app shutdown)
select {
case <-ctx.Done():
return
default:
}
// Determine Scraping Strategy
var url string
// URL encode the brand name which acts as our keyword
keyword := monitor.BrandName
if monitor.XHandle != "" {
if monitor.XHandle == keyword || keyword == "" {
// Standard profile timeline scraping
log.Printf("[MonitorWorker] Scraping timeline for account @%s", monitor.XHandle)
url = fmt.Sprintf("%s/%s", w.baseUrl, monitor.XHandle)
} else {
// Combo scraping: Keyword + Specific Account
log.Printf("[MonitorWorker] Scraping combo: '%s' from @%s", keyword, monitor.XHandle)
url = fmt.Sprintf("%s/search?f=tweets&q=%s+from%%3A%s", w.baseUrl, keyword, monitor.XHandle)
}
} else if keyword != "" {
// Global search for Keyword across X
log.Printf("[MonitorWorker] Scraping global search for keyword: '%s'", keyword)
url = fmt.Sprintf("%s/search?f=tweets&q=%s", w.baseUrl, keyword)
} else {
continue // Invalid monitor config
}
w.scrapeAndLog(url)
// Anti-Ban: Jitter delay between requests (3s to 8s)
w.client.JitterDelay(3000, 8000)
}
log.Println("[MonitorWorker] Scrape cycle completed.")
}
func (w *MonitorWorker) scrapeAndLog(url string) {
htmlData, err := w.client.Fetch(url)
if err != nil {
log.Printf("[MonitorWorker] Error scraping %s: %v", url, err)
return
}
tweets, err := scraper.ParseTimeline(htmlData)
if err != nil {
log.Printf("[MonitorWorker] Error parsing HTML for %s: %v", url, err)
return
}
log.Printf("[MonitorWorker] Extracted %d tweets from %s", len(tweets), url)
// Epic 6: Upsert into tracking database
upsertCount := 0
for _, rawTweet := range tweets {
tweet := &model.Tweet{
XTweetID: rawTweet.ID,
AuthorHandle: rawTweet.Handle,
Content: rawTweet.Content,
PostedAt: rawTweet.CreatedAt,
LikeCount: rawTweet.Likes,
RetweetCount: rawTweet.Retweets,
ReplyCount: rawTweet.Replies,
CrawlQueue: "normal",
IsProcessed: false,
LastCrawledAt: time.Now(),
}
// Save/Update in DB
err := w.tweetRepo.Upsert(tweet)
if err != nil {
log.Printf("[MonitorWorker] Error UPSERTing tweet %s: %v", tweet.XTweetID, err)
} else {
upsertCount++
}
}
log.Printf("[MonitorWorker] Successfully Upserted %d/%d tweets to the database.", upsertCount, len(tweets))
}

View File

@@ -0,0 +1,142 @@
package worker
import (
"context"
"fmt"
"log"
"strings"
"time"
"github.com/zs/InsightReply/internal/model"
"github.com/zs/InsightReply/internal/repository"
"github.com/zs/InsightReply/internal/scraper"
"github.com/zs/InsightReply/internal/service"
)
type PerformanceWorker struct {
repo *repository.ReplyRepository
client *scraper.ScraperClient
aiSvc *service.AIService
baseUrl string
}
func NewPerformanceWorker(repo *repository.ReplyRepository, aiSvc *service.AIService) *PerformanceWorker {
return &PerformanceWorker{
repo: repo,
client: scraper.NewScraperClient(),
aiSvc: aiSvc,
baseUrl: "https://x.beenglish.eu.org",
}
}
// Start begins the 24h retroactive performance checking loop
func (w *PerformanceWorker) Start(ctx context.Context, interval time.Duration) {
log.Printf("[PerformanceWorker] Starting retroactive engagement tracking every %v", interval)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
log.Println("[PerformanceWorker] Stopping background performance loop")
return
case <-ticker.C:
w.runCycle(ctx)
}
}
}
func (w *PerformanceWorker) runCycle(ctx context.Context) {
pending, err := w.repo.GetPendingPerformanceChecks()
if err != nil {
log.Printf("[PerformanceWorker] Error fetching pending checks: %v", err)
return
}
for _, reply := range pending {
// Stop processing if context cancelled
select {
case <-ctx.Done():
return
default:
}
xTweetID, err := w.repo.GetTweetXTweetID(reply.TweetID)
if err != nil {
continue
}
// Scrape the specific thread
// Nitter handles /i/status/12345 generic routes
url := fmt.Sprintf("%s/i/status/%s", w.baseUrl, xTweetID)
log.Printf("[PerformanceWorker] Checking thread %s for user's AI reply performance", url)
htmlData, err := w.client.Fetch(url)
if err != nil {
w.client.JitterDelay(2000, 5000)
continue
}
threadReplies, err := scraper.ParseTimeline(htmlData)
if err != nil {
continue
}
// Search for the user's generated text within the thread replies
found := false
for _, threadReply := range threadReplies {
// Basic similarity check: if 50% of the AI sentence is present
// Real implementation might use Levenshtein distance, but strings.Contains on chunks works for MVP
snippet := reply.Content
if len(snippet) > 20 {
snippet = snippet[:20]
}
if strings.Contains(threadReply.Content, snippet) {
found = true
// WE FOUND OUR REPLY! Record its metrics
perf := &model.ReplyPerformance{
ReplyID: reply.ID,
UserID: reply.UserID,
LikeCountIncrease: threadReply.Likes,
ReplyCountIncrease: threadReply.Replies,
CheckTime: time.Now(),
}
w.repo.SaveReplyPerformance(perf)
log.Printf("[PerformanceWorker] 🎯 Verified AI reply in wild! Likes: %d, Replies: %d", perf.LikeCountIncrease, perf.ReplyCountIncrease)
// Epic 13 AI Tone Engine: Autonomous Style Cloning for proven viral comments
if perf.LikeCountIncrease >= 10 {
log.Printf("[PerformanceWorker] Reply went viral! Asking AI to reverse-engineer linguistic styling.")
styleProfile, err := w.aiSvc.ExtractStyle(ctx, reply.Content)
if err == nil && styleProfile != "" {
err = w.repo.SaveStyleExtraction(reply.UserID, styleProfile)
if err != nil {
log.Printf("[PerformanceWorker] Error saving style database mapping: %v", err)
} else {
log.Printf("[PerformanceWorker] Successfully built user style clone: %s", styleProfile)
}
}
}
break
}
}
// Even if not found (maybe they edited heavily or didn't actually post it), we mark it as checked to prevent infinite re-checking
if !found {
perf := &model.ReplyPerformance{
ReplyID: reply.ID,
UserID: reply.UserID,
CheckTime: time.Now(),
}
w.repo.SaveReplyPerformance(perf)
}
w.client.JitterDelay(3000, 8000)
}
}