News Intelligence Agent - 뉴스 인텔리전스
Role
/news 페이지에서 배치로 여러 뉴스를 동시 분석하여 시장 전체 흐름을 파악합니다.
Core Capabilities
- Batch News Processing
async def analyze_batch( news_articles: List[NewsArticle], batch_size: int = 50 ) -> Dict: """Process multiple articles in parallel"""
results = []
# Process in batches to avoid API rate limits
for i in range(0, len(news_articles), batch_size):
batch = news_articles[i:i+batch_size]
# Parallel processing
batch_results = await asyncio.gather(*[
analyze_single_article(article)
for article in batch
])
results.extend(batch_results)
return aggregate_batch_results(results)
2. Sentiment Analysis
Sentiment Scoring
def calculate_sentiment(text: str) -> float: """Calculate sentiment score -1 to +1"""
# Positive keywords
positive = ["surge", "beat", "record", "growth", "bullish", "upgrade"]
# Negative keywords
negative = ["plunge", "miss", "loss", "decline", "bearish", "downgrade"]
# Count occurrences
pos_count = sum(text.lower().count(word) for word in positive)
neg_count = sum(text.lower().count(word) for word in negative)
# Normalize
total = pos_count + neg_count
if total == 0:
return 0.0
sentiment = (pos_count - neg_count) /total
# Clamp to [-1, 1]
return max(-1.0, min(1.0, sentiment))
Sentiment Categories
SENTIMENT_LEVELS = { "VERY_POSITIVE": (0.6, 1.0), "POSITIVE": (0.3, 0.6), "NEUTRAL": (-0.3, 0.3), "NEGATIVE": (-0.6, -0.3), "VERY_NEGATIVE": (-1.0, -0.6) }
- Keyword Extraction
from sklearn.feature_extraction.text import TfidfVectorizer
def extract_keywords(texts: List[str], top_n: int = 10) -> List[str]: """Extract important keywords using TF-IDF"""
vectorizer = TfidfVectorizer(
max_features=top_n,
stop_words='english',
ngram_range=(1, 2) # Unigrams and bigrams
)
tfidf_matrix = vectorizer.fit_transform(texts)
feature_names = vectorizer.get_feature_names_out()
# Get top keywords
scores = tfidf_matrix.sum(axis=0).A1
top_indices = scores.argsort()[-top_n:][::-1]
keywords = [feature_names[i] for i in top_indices]
return keywords
4. Theme Detection
from sklearn.cluster import KMeans from sklearn.feature_extraction.text import TfidfVectorizer
def detect_themes(articles: List[str], n_themes: int = 5) -> List[Dict]: """Cluster articles into themes"""
# Vectorize
vectorizer = TfidfVectorizer(max_features=100, stop_words='english')
tfidf_matrix = vectorizer.fit_transform(articles)
# Cluster
kmeans = KMeans(n_clusters=n_themes, random_state=42)
kmeans.fit(tfidf_matrix)
# Extract theme keywords
themes = []
feature_names = vectorizer.get_feature_names_out()
for i, cluster_center in enumerate(kmeans.cluster_centers_):
top_indices = cluster_center.argsort()[-5:][::-1]
theme_keywords = [feature_names[idx] for idx in top_indices]
# Count articles in theme
article_count = (kmeans.labels_ == i).sum()
themes.append({
"theme_id": i,
"keywords": theme_keywords,
"article_count": article_count,
"theme_name": generate_theme_name(theme_keywords)
})
return sorted(themes, key=lambda x: x['article_count'], reverse=True)
5. Ticker Buzz Score
def calculate_ticker_buzz( ticker: str, news_articles: List[NewsArticle], timeframe_hours: int = 24 ) -> Dict: """Calculate how much a ticker is being discussed"""
# Filter articles mentioning ticker
ticker_articles = [
a for a in news_articles
if ticker in (a.ticker or '') or ticker in (a.content or '').upper()
]
# Recency weight (more recent = higher weight)
now = datetime.now()
weighted_mentions = 0
for article in ticker_articles:
hours_ago = (now - article.created_at).total_seconds() / 3600
if hours_ago <= 0 timeframe_hours:
# Exponential decay
weight = math.exp(-hours_ago / (timeframe_hours / 2))
weighted_mentions += weight
# Normalize to 0-100 scale
buzz_score = min(100, weighted_mentions * 10)
# Sentiment breakdown
sentiments = [a.sentiment_score for a in ticker_articles if a.sentiment_score]
avg_sentiment = sum(sentiments) / len(sentiments) if sentiments else 0
return {
"ticker": ticker,
"buzz_score": buzz_score,
"mention_count": len(ticker_articles),
"avg_sentiment": avg_sentiment,
"timeframe_hours": timeframe_hours,
"trending": "UP" if buzz_score > 50 else "NORMAL"
}
Decision Framework
Step 1: Fetch News Articles articles = db.query(NewsArticle).filter( NewsArticle.created_at >= datetime.now() - timedelta(hours=24) ).all()
Step 2: Batch Sentiment Analysis FOR each article in articles: sentiment = calculate_sentiment(article.content) article.sentiment_score = sentiment article.sentiment_label = categorize_sentiment(sentiment)
Step 3: Extract Keywords all_text = [a.content for a in articles] keywords = extract_keywords(all_text, top_n=20)
Step 4: Detect Themes themes = detect_themes([a.headline + ' ' + a.content for a in articles])
Step 5: Calculate Ticker Buzz unique_tickers = set(a.ticker for a in articles if a.ticker)
buzz_scores = {} FOR ticker in unique_tickers: buzz_scores[ticker] = calculate_ticker_buzz(ticker, articles)
Step 6: Aggregate Results return { "total_articles": len(articles), "sentiment_distribution": count_by_sentiment(articles), "top_keywords": keywords, "trending_themes": themes, "ticker_buzz": buzz_scores, "timestamp": datetime.now() }
Output Format
{ "analysis_timestamp": "2025-12-21T13:00:00Z", "timeframe": "last_24_hours", "total_articles_analyzed": 237,
"sentiment_distribution": { "VERY_POSITIVE": 45, "POSITIVE": 89, "NEUTRAL": 67, "NEGATIVE": 28, "VERY_NEGATIVE": 8 },
"market_sentiment_summary": { "overall_score": 0.32, "overall_label": "POSITIVE", "confidence": 0.85, "interpretation": "시장 전반적으로 긍정적 뉴스 우세" },
"top_keywords": [ { "keyword": "ai growth", "frequency": 67, "importance_score": 0.92 }, { "keyword": "earnings beat", "frequency": 54, "importance_score": 0.88 }, { "keyword": "fed rate", "frequency": 48, "importance_score": 0.85 }, { "keyword": "semiconductor", "frequency": 42, "importance_score": 0.80 }, { "keyword": "tech rally", "frequency": 38, "importance_score": 0.75 } ],
"trending_themes": [ { "theme_id": 0, "theme_name": "AI 붐", "keywords": ["ai", "chip", "nvidia", "demand", "growth"], "article_count": 78, "avg_sentiment": 0.68, "interpretation": "AI 관련 긍정적 뉴스 주도" }, { "theme_id": 1, "theme_name": "Fed 금리 논의", "keywords": ["fed", "rate", "inflation", "policy", "powell"], "article_count": 56, "avg_sentiment": 0.12, "interpretation": "금리 관련 중립적 논의" }, { "theme_id": 2, "theme_name": "실적 시즌", "keywords": ["earnings", "beat", "guidance", "revenue", "profit"], "article_count": 43, "avg_sentiment": 0.45, "interpretation": "실적 호조 뉴스 다수" } ],
"ticker_buzz_rankings": [ { "rank": 1, "ticker": "NVDA", "buzz_score": 92, "mention_count": 45, "avg_sentiment": 0.75, "trending": "UP", "summary": "AI 수요 급증 관련 압도적 언급" }, { "rank": 2, "ticker": "AAPL", "buzz_score": 78, "mention_count": 38, "avg_sentiment": 0.58, "trending": "UP", "summary": "iPhone 판매 호조 뉴스" }, { "rank": 3, "ticker": "TSLA", "buzz_score": 65, "mention_count": 32, "avg_sentiment": -0.25, "trending": "UP", "summary": "가격 인하 관련 우려 섞인 논의" } ],
"sector_sentiment": { "Technology": { "article_count": 128, "avg_sentiment": 0.52, "label": "POSITIVE", "top_tickers": ["NVDA", "AAPL", "MSFT"] }, "Finance": { "article_count": 45, "avg_sentiment": 0.18, "label": "NEUTRAL", "top_tickers": ["JPM", "BAC", "GS"] }, "Healthcare": { "article_count": 34, "avg_sentiment": 0.35, "label": "POSITIVE", "top_tickers": ["JNJ", "PFE", "MRNA"] } },
"alerts": [ { "type": "HIGH_BUZZ", "ticker": "NVDA", "message": "NVDA buzz score 92 (매우 높음)", "severity": "INFO" }, { "type": "SENTIMENT_SPIKE", "theme": "AI 붐", "message": "AI 관련 뉴스 sentiment +0.68 (매우 긍정)", "severity": "INFO" } ] }
Examples
Example 1: Tech Rally Day
Input: 237 articles (last 24h)
Output:
- Overall Sentiment: +0.45 (POSITIVE)
- Top Theme: "AI Growth" (78 articles)
- Top Buzz: NVDA (92), AAPL (78), MSFT (65)
- Keywords: "ai growth", "earnings beat", "chip demand"
Example 2: Market Correction Day
Input: 189 articles
Output:
- Overall Sentiment: -0.38 (NEGATIVE)
- Top Theme: "Fed Rate Hike Fears" (92 articles)
- Top Buzz: SPY (88), VIX (76), TLT (54)
- Keywords: "rate hike", "inflation", "recession fears"
Guidelines
Do's ✅
-
배치 처리: 효율성 극대화
-
Ticker Buzz 추적: 시장 주목도 파악
-
Theme Detection: 숨겨진 패턴 발견
-
Sector Breakdown: 섹터별 sentiment
Don'ts ❌
-
단일 기사만 분석 금지 (Quick/Deep Reasoning 역할)
-
Theme 너무 세분화 금지 (5개 이내)
-
Buzz score 과신 금지 (quality over quantity)
-
Historical context 무시 금지
Integration
Batch Processing Endpoint
@router.post("/api/news/batch-analyze") async def batch_analyze_news( timeframe_hours: int = 24, db: Session = Depends(get_db) ): """Batch analyze recent news"""
# Fetch articles
cutoff = datetime.now() - timedelta(hours=timeframe_hours)
articles = db.query(NewsArticle).filter(
NewsArticle.created_at >= cutoff
).all()
# Run News Intelligence Agent
agent = NewsIntelligenceAgent()
result = await agent.execute({
'articles': articles,
'timeframe_hours': timeframe_hours
})
return result
Real-Time Updates (WebSocket)
from fastapi import WebSocket
@router.websocket("/ws/news-intel") async def news_intel_websocket(websocket: WebSocket): """Stream news intelligence updates"""
await websocket.accept()
while True:
# Run analysis every 5 minutes
result = await batch_analyze_news(timeframe_hours=1)
await websocket.send_json(result)
await asyncio.sleep(300) # 5 minutes
Performance Metrics
-
Batch Processing Speed: 목표 < 10초 for 100 articles
-
Sentiment Accuracy: > 80%
-
Theme Detection Quality: > 75% (사람 판단과 일치)
-
Ticker Buzz Precision: > 85%
Comparison
Agent Scope Speed Use Case
News Intelligence 배치 (100+ articles) 10초 시장 전체 흐름
Quick Analyzer 단일 ticker 5초 개별 종목 확인
Deep Reasoning 단일 news 30초 중요한 뉴스 심층 분석
Version History
- v1.0 (2025-12-21): Initial release with batch processing and theme detection