first commit

This commit is contained in:
2026-01-26 12:34:00 +01:00
commit e64465a7e6
29 changed files with 2952 additions and 0 deletions

3
src/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
"""News Agent - AI-powered daily tech news aggregator"""
__version__ = "0.1.0"

View File

@@ -0,0 +1 @@
"""News aggregation from various sources"""

View File

@@ -0,0 +1,162 @@
"""RSS feed fetching and parsing"""
import feedparser
import httpx
from datetime import datetime, timedelta, timezone
from hashlib import sha256
from typing import Optional
from email.utils import parsedate_to_datetime
from ..config import RSSSource
from ..storage.models import Article
from ..logger import get_logger
logger = get_logger()
class RSSFetcher:
"""Fetch and parse RSS feeds"""
def __init__(self, timeout: int = 30, hours_lookback: int = 24):
"""
Initialize RSS fetcher
Args:
timeout: HTTP request timeout in seconds
hours_lookback: How many hours back to fetch articles
"""
self.timeout = timeout
self.hours_lookback = hours_lookback
self.client = httpx.AsyncClient(
timeout=timeout,
follow_redirects=True,
headers={"User-Agent": "NewsAgent/1.0 RSS Reader"},
)
async def close(self):
"""Close HTTP client"""
await self.client.aclose()
async def fetch(self, source: RSSSource) -> list[Article]:
"""
Fetch and parse articles from RSS feed
Args:
source: RSS source configuration
Returns:
List of Article objects from the feed
"""
try:
logger.info(f"Fetching RSS feed: {source.name}")
response = await self.client.get(str(source.url))
response.raise_for_status()
# Parse feed
feed = feedparser.parse(response.text)
if feed.bozo:
logger.warning(f"Feed parsing warning for {source.name}: {feed.bozo_exception}")
articles = []
cutoff_time = datetime.now(timezone.utc) - timedelta(hours=self.hours_lookback)
for entry in feed.entries:
try:
article = self._parse_entry(entry, source)
if article and article.published >= cutoff_time:
articles.append(article)
except Exception as e:
logger.warning(f"Failed to parse entry from {source.name}: {e}")
continue
logger.info(f"Fetched {len(articles)} articles from {source.name}")
return articles
except httpx.HTTPError as e:
logger.error(f"HTTP error fetching {source.name}: {e}")
return []
except Exception as e:
logger.error(f"Unexpected error fetching {source.name}: {e}")
return []
def _parse_entry(
self, entry: feedparser.FeedParserDict, source: RSSSource
) -> Optional[Article]:
"""Parse a single RSS entry into an Article"""
# Get URL
url = entry.get("link")
if not url:
return None
# Generate unique ID from URL
article_id = sha256(url.encode()).hexdigest()
# Get title
title = entry.get("title", "Untitled")
# Get published date
published = self._parse_date(entry)
if not published:
published = datetime.now(timezone.utc)
# Get summary/content
summary = entry.get("summary", None)
content = entry.get("content", [{}])[0].get("value", entry.get("description", title))
# Remove HTML tags from content if needed (basic cleanup)
if content == title:
content = summary or title
return Article(
id=article_id,
url=url,
title=title,
summary=summary,
content=content,
published=published,
source=source.name,
category=source.category,
fetched_at=datetime.now(timezone.utc),
)
def _parse_date(self, entry: feedparser.FeedParserDict) -> Optional[datetime]:
"""Parse published date from entry"""
# Try different date fields
for date_field in ["published_parsed", "updated_parsed", "created_parsed"]:
if date_field in entry and entry[date_field]:
try:
time_tuple = entry[date_field]
dt = datetime(*time_tuple[:6], tzinfo=timezone.utc)
return dt
except (TypeError, ValueError):
continue
# Try parsing date strings
for date_field in ["published", "updated", "created"]:
if date_field in entry and entry[date_field]:
try:
return parsedate_to_datetime(entry[date_field])
except (TypeError, ValueError):
continue
return None
async def fetch_all(self, sources: list[RSSSource]) -> list[Article]:
"""
Fetch articles from multiple RSS sources concurrently
Args:
sources: List of RSS sources to fetch
Returns:
Combined list of articles from all sources
"""
all_articles = []
for source in sources:
articles = await self.fetch(source)
all_articles.extend(articles)
logger.info(f"Total articles fetched from all sources: {len(all_articles)}")
return all_articles

1
src/ai/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""AI processing using OpenRouter"""

117
src/ai/client.py Normal file
View File

@@ -0,0 +1,117 @@
"""OpenRouter API client"""
import json
from typing import Any
from openai import AsyncOpenAI
from ..config import get_config
from ..logger import get_logger
logger = get_logger()
class OpenRouterClient:
"""Async client for OpenRouter API"""
def __init__(self):
config = get_config()
env = config.env
self.client = AsyncOpenAI(
base_url=config.ai.base_url,
api_key=env.openrouter_api_key,
default_headers={
**({"HTTP-Referer": env.openrouter_site_url} if env.openrouter_site_url else {}),
**({"X-Title": env.openrouter_site_name} if env.openrouter_site_name else {}),
},
)
self.model = config.ai.model
logger.info(f"Initialized OpenRouter client with model: {self.model}")
async def chat_completion(
self,
system_prompt: str,
user_prompt: str,
temperature: float = 0.7,
max_tokens: int = 1000,
json_mode: bool = False,
) -> str:
"""
Send chat completion request
Args:
system_prompt: System instruction
user_prompt: User message
temperature: Sampling temperature (0-2)
max_tokens: Maximum tokens to generate
json_mode: Enable JSON response format
Returns:
Response content as string
"""
try:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
kwargs: dict[str, Any] = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if json_mode:
kwargs["response_format"] = {"type": "json_object"}
response = await self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
if not content:
raise ValueError("Empty response from API")
# Log token usage
if response.usage:
logger.debug(
f"Tokens used - Prompt: {response.usage.prompt_tokens}, "
f"Completion: {response.usage.completion_tokens}, "
f"Total: {response.usage.total_tokens}"
)
return content
except Exception as e:
logger.error(f"OpenRouter API error: {e}")
raise
async def chat_completion_json(
self, system_prompt: str, user_prompt: str, temperature: float = 0.3, max_tokens: int = 500
) -> dict[str, Any]:
"""
Send chat completion request expecting JSON response
Args:
system_prompt: System instruction
user_prompt: User message
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
Returns:
Parsed JSON response as dictionary
"""
content = await self.chat_completion(
system_prompt=system_prompt,
user_prompt=user_prompt,
temperature=temperature,
max_tokens=max_tokens,
json_mode=True,
)
try:
return json.loads(content)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON response: {content}")
raise ValueError(f"Invalid JSON response: {e}")

118
src/ai/filter.py Normal file
View File

@@ -0,0 +1,118 @@
"""Article relevance filtering using AI"""
from typing import Optional
from ..storage.models import Article
from ..config import get_config
from ..logger import get_logger
from .client import OpenRouterClient
from .prompts import FILTERING_SYSTEM_PROMPT, FILTERING_USER_PROMPT
logger = get_logger()
class ArticleFilter:
"""Filter articles by relevance using AI"""
def __init__(self, client: OpenRouterClient):
self.client = client
config = get_config()
self.interests = config.ai.interests
self.min_score = config.ai.filtering.min_score
async def score_article(self, article: Article) -> Optional[float]:
"""
Score article relevance (0-10)
Args:
article: Article to score
Returns:
Relevance score or None if scoring fails
"""
try:
# Prepare prompts
system_prompt = FILTERING_SYSTEM_PROMPT.format(
interests="\n".join(f"- {interest}" for interest in self.interests)
)
# Truncate content for API call
content_preview = article.content[:500] + ("..." if len(article.content) > 500 else "")
user_prompt = FILTERING_USER_PROMPT.format(
title=article.title,
source=article.source,
category=article.category,
content=content_preview,
)
# Get score from AI
response = await self.client.chat_completion_json(
system_prompt=system_prompt,
user_prompt=user_prompt,
temperature=0.3,
max_tokens=200,
)
score = float(response.get("score", 0))
reason = response.get("reason", "No reason provided")
logger.debug(f"Article '{article.title}' scored {score:.1f}: {reason}")
return score
except Exception as e:
logger.error(f"Failed to score article '{article.title}': {e}")
return None
async def is_relevant(self, article: Article) -> tuple[bool, Optional[float]]:
"""
Check if article meets relevance threshold
Args:
article: Article to check
Returns:
Tuple of (is_relevant, score)
"""
score = await self.score_article(article)
if score is None:
return False, None
is_relevant = score >= self.min_score
return is_relevant, score
async def filter_articles(
self, articles: list[Article], max_articles: Optional[int] = None
) -> list[tuple[Article, float]]:
"""
Filter and rank articles by relevance
Args:
articles: Articles to filter
max_articles: Maximum number of articles to return
Returns:
List of (article, score) tuples, sorted by score descending
"""
scored_articles: list[tuple[Article, float]] = []
for article in articles:
is_relevant, score = await self.is_relevant(article)
if is_relevant and score is not None:
scored_articles.append((article, score))
# Sort by score descending
scored_articles.sort(key=lambda x: x[1], reverse=True)
# Apply limit if specified
if max_articles:
scored_articles = scored_articles[:max_articles]
logger.info(
f"Filtered {len(articles)} articles down to {len(scored_articles)} relevant ones"
)
return scored_articles

60
src/ai/prompts.py Normal file
View File

@@ -0,0 +1,60 @@
"""Prompt templates for AI processing"""
FILTERING_SYSTEM_PROMPT = """You are a news relevance analyzer. Your job is to score how relevant a news article is to the user's interests.
User Interests:
{interests}
Score the article on a scale of 0-10 based on:
- Direct relevance to stated interests (0-4 points)
- Quality and depth of content (0-3 points)
- Timeliness and importance (0-3 points)
Return ONLY a JSON object with this exact format:
{{"score": <float>, "reason": "<brief explanation>"}}
Be strict - only highly relevant articles should score above 7.0."""
FILTERING_USER_PROMPT = """Article Title: {title}
Source: {source}
Category: {category}
Content Preview: {content}
Score this article's relevance (0-10) and explain why."""
SUMMARIZATION_SYSTEM_PROMPT = """You are a technical news summarizer. Create concise, informative summaries of tech articles.
Guidelines:
- Focus on key facts, findings, and implications
- Include important technical details
- Keep summaries to 2-3 sentences
- Use clear, professional language
- Highlight what makes this newsworthy
Return ONLY the summary text, no additional formatting."""
SUMMARIZATION_USER_PROMPT = """Title: {title}
Source: {source}
Content: {content}
Write a concise 2-3 sentence summary highlighting the key information."""
BATCH_SUMMARIZATION_SYSTEM_PROMPT = """You are a technical news summarizer. Create concise summaries for multiple articles.
For each article, provide a 2-3 sentence summary that:
- Captures the key facts and findings
- Highlights technical details
- Explains why it's newsworthy
Return a JSON array with this exact format:
[
{{"id": "<article_id>", "summary": "<2-3 sentence summary>"}},
...
]"""

72
src/ai/summarizer.py Normal file
View File

@@ -0,0 +1,72 @@
"""Article summarization using AI"""
from ..storage.models import Article
from ..logger import get_logger
from .client import OpenRouterClient
from .prompts import SUMMARIZATION_SYSTEM_PROMPT, SUMMARIZATION_USER_PROMPT
logger = get_logger()
class ArticleSummarizer:
"""Summarize articles using AI"""
def __init__(self, client: OpenRouterClient):
self.client = client
async def summarize(self, article: Article) -> str:
"""
Generate concise summary of article
Args:
article: Article to summarize
Returns:
Summary text (2-3 sentences)
"""
try:
# Truncate content if too long
max_content_length = 2000
content = article.content
if len(content) > max_content_length:
content = content[:max_content_length] + "..."
user_prompt = SUMMARIZATION_USER_PROMPT.format(
title=article.title, source=article.source, content=content
)
summary = await self.client.chat_completion(
system_prompt=SUMMARIZATION_SYSTEM_PROMPT,
user_prompt=user_prompt,
temperature=0.5,
max_tokens=300,
)
logger.debug(f"Summarized article: {article.title}")
return summary.strip()
except Exception as e:
logger.error(f"Failed to summarize article '{article.title}': {e}")
# Fallback to original summary or truncated content
if article.summary:
return article.summary
return article.content[:200] + "..."
async def summarize_batch(self, articles: list[Article]) -> dict[str, str]:
"""
Summarize multiple articles
Args:
articles: List of articles to summarize
Returns:
Dictionary mapping article IDs to summaries
"""
summaries = {}
for article in articles:
summary = await self.summarize(article)
summaries[article.id] = summary
logger.info(f"Summarized {len(summaries)} articles")
return summaries

156
src/config.py Normal file
View File

@@ -0,0 +1,156 @@
"""Configuration management for News Agent"""
import yaml
from pathlib import Path
from typing import Any
from pydantic import BaseModel, Field, HttpUrl
from pydantic_settings import BaseSettings, SettingsConfigDict
class RSSSource(BaseModel):
"""RSS feed source configuration"""
name: str
url: HttpUrl
category: str
class AIFilteringConfig(BaseModel):
"""AI filtering configuration"""
enabled: bool = True
min_score: float = Field(ge=0, le=10, default=6.5)
max_articles: int = Field(ge=1, default=15)
class AIConfig(BaseModel):
"""AI processing configuration"""
provider: str = "openrouter"
base_url: str = "https://openrouter.ai/api/v1"
model: str = "google/gemini-flash-1.5"
filtering: AIFilteringConfig
interests: list[str]
class SMTPConfig(BaseModel):
"""SMTP server configuration"""
host: str = "localhost"
port: int = 25
use_tls: bool = False
use_ssl: bool = False
username: str | None = None
password: str | None = None
class EmailConfig(BaseModel):
"""Email configuration"""
to: str
from_: str = Field(alias="from")
from_name: str = "Daily Tech News Agent"
subject_template: str = "Tech News Digest - {date}"
smtp: SMTPConfig
class ScheduleConfig(BaseModel):
"""Schedule configuration"""
time: str = "07:00"
timezone: str = "Europe/Oslo"
class DatabaseConfig(BaseModel):
"""Database configuration"""
path: str = "data/articles.db"
retention_days: int = 30
class LoggingConfig(BaseModel):
"""Logging configuration"""
level: str = "INFO"
file: str = "data/logs/news-agent.log"
max_bytes: int = 10485760
backup_count: int = 5
class EnvSettings(BaseSettings):
"""Environment variable settings"""
model_config = SettingsConfigDict(env_file=".env", case_sensitive=False, extra="ignore")
openrouter_api_key: str
openrouter_site_url: str | None = None
openrouter_site_name: str | None = None
smtp_username: str | None = None
smtp_password: str | None = None
error_notification_email: str | None = None
class Config:
"""Main configuration manager"""
def __init__(self, config_path: str | Path = "config.yaml"):
"""Load configuration from YAML file and environment variables"""
self.config_path = Path(config_path)
# Load YAML configuration
with open(self.config_path) as f:
self._config: dict[str, Any] = yaml.safe_load(f)
# Load environment variables
self.env = EnvSettings()
@property
def rss_sources(self) -> list[RSSSource]:
"""Get all RSS sources"""
return [RSSSource(**src) for src in self._config["sources"]["rss"]]
@property
def ai(self) -> AIConfig:
"""Get AI configuration"""
return AIConfig(**self._config["ai"])
@property
def email(self) -> EmailConfig:
"""Get email configuration"""
email_config = self._config["email"].copy()
# Merge SMTP credentials from environment variables
if self.env.smtp_username:
email_config["smtp"]["username"] = self.env.smtp_username
if self.env.smtp_password:
email_config["smtp"]["password"] = self.env.smtp_password
return EmailConfig(**email_config)
@property
def schedule(self) -> ScheduleConfig:
"""Get schedule configuration"""
return ScheduleConfig(**self._config["schedule"])
@property
def database(self) -> DatabaseConfig:
"""Get database configuration"""
return DatabaseConfig(**self._config["database"])
@property
def logging(self) -> LoggingConfig:
"""Get logging configuration"""
return LoggingConfig(**self._config["logging"])
# Global config instance (lazy loaded)
_config: Config | None = None
def get_config() -> Config:
"""Get or create global config instance"""
global _config
if _config is None:
_config = Config()
return _config

1
src/email/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Email generation and sending"""

114
src/email/generator.py Normal file
View File

@@ -0,0 +1,114 @@
"""Email HTML generation from digest data"""
from datetime import datetime
from pathlib import Path
from collections import defaultdict
from jinja2 import Environment, FileSystemLoader
from premailer import transform
from ..storage.models import DigestEntry
from ..logger import get_logger
logger = get_logger()
class EmailGenerator:
"""Generate HTML emails from digest data"""
def __init__(self):
# Set up Jinja2 template environment
template_dir = Path(__file__).parent / "templates"
self.env = Environment(loader=FileSystemLoader(template_dir))
def generate_digest_email(
self, entries: list[DigestEntry], date_str: str, subject: str
) -> tuple[str, str]:
"""
Generate HTML email for daily digest
Args:
entries: List of digest entries (articles with summaries)
date_str: Date string for the digest
subject: Email subject line
Returns:
Tuple of (html_content, text_content)
"""
# Group articles by category
articles_by_category = defaultdict(list)
for entry in entries:
articles_by_category[entry.category].append(entry)
# Sort categories
sorted_categories = sorted(articles_by_category.keys())
# Get unique sources count
unique_sources = len(set(entry.article.source for entry in entries))
# Prepare template data
template_data = {
"title": subject,
"date": date_str,
"total_articles": len(entries),
"total_sources": unique_sources,
"total_categories": len(sorted_categories),
"articles_by_category": {cat: articles_by_category[cat] for cat in sorted_categories},
}
# Render HTML template
template = self.env.get_template("daily_digest.html")
html = template.render(**template_data)
# Inline CSS for email compatibility
html_inlined = transform(html)
# Generate plain text version
text = self._generate_text_version(entries, date_str, subject)
logger.info(f"Generated email with {len(entries)} articles")
return html_inlined, text
def _generate_text_version(
self, entries: list[DigestEntry], date_str: str, subject: str
) -> str:
"""Generate plain text version of email"""
lines = [
subject,
"=" * len(subject),
"",
f"Date: {date_str}",
f"Total Articles: {len(entries)}",
"",
"",
]
# Group by category
articles_by_category = defaultdict(list)
for entry in entries:
articles_by_category[entry.category].append(entry)
# Output each category
for category in sorted(articles_by_category.keys()):
lines.append(f"{category.upper()}")
lines.append("-" * len(category))
lines.append("")
for entry in articles_by_category[category]:
article = entry.article
lines.append(f"{article.title}")
lines.append(f" Source: {article.source}")
lines.append(f" Published: {article.published.strftime('%B %d, %Y at %H:%M')}")
lines.append(f" Relevance: {entry.relevance_score:.1f}/10")
lines.append(f" URL: {article.url}")
lines.append(f" Summary: {entry.ai_summary}")
lines.append("")
lines.append("")
lines.append("")
lines.append("---")
lines.append("Generated by News Agent | Powered by OpenRouter AI")
return "\n".join(lines)

77
src/email/sender.py Normal file
View File

@@ -0,0 +1,77 @@
"""Email sending via SMTP"""
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.utils import formatdate
from ..config import get_config
from ..logger import get_logger
logger = get_logger()
class EmailSender:
"""Send emails via SMTP"""
def __init__(self):
config = get_config()
self.config = config.email
def send(self, subject: str, html_content: str, text_content: str) -> bool:
"""
Send email with HTML and plain text versions
Args:
subject: Email subject line
html_content: HTML email body
text_content: Plain text email body
Returns:
True if sent successfully, False otherwise
"""
try:
# Create message
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = f"{self.config.from_name} <{self.config.from_}>"
msg["To"] = self.config.to
msg["Date"] = formatdate(localtime=True)
# Attach parts
part_text = MIMEText(text_content, "plain", "utf-8")
part_html = MIMEText(html_content, "html", "utf-8")
msg.attach(part_text)
msg.attach(part_html)
# Send via SMTP
smtp_config = self.config.smtp
if smtp_config.use_ssl:
server = smtplib.SMTP_SSL(smtp_config.host, smtp_config.port)
else:
server = smtplib.SMTP(smtp_config.host, smtp_config.port)
try:
if smtp_config.use_tls and not smtp_config.use_ssl:
server.starttls()
# Login if credentials provided
if smtp_config.username and smtp_config.password:
server.login(smtp_config.username, smtp_config.password)
# Send email
server.send_message(msg)
logger.info(f"Email sent successfully to {self.config.to}")
return True
finally:
server.quit()
except smtplib.SMTPException as e:
logger.error(f"SMTP error sending email: {e}")
return False
except Exception as e:
logger.error(f"Unexpected error sending email: {e}")
return False

View File

@@ -0,0 +1,194 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ title }}</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
color: #333;
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
.container {
background-color: #ffffff;
border-radius: 8px;
padding: 30px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.header {
border-bottom: 3px solid #2563eb;
padding-bottom: 20px;
margin-bottom: 30px;
}
h1 {
color: #1e40af;
margin: 0 0 10px 0;
font-size: 28px;
}
.date {
color: #6b7280;
font-size: 14px;
}
.summary {
background-color: #eff6ff;
border-left: 4px solid #2563eb;
padding: 15px;
margin-bottom: 30px;
border-radius: 4px;
}
.summary p {
margin: 5px 0;
font-size: 14px;
}
.category-section {
margin-bottom: 40px;
}
.category-header {
background-color: #f3f4f6;
padding: 10px 15px;
border-radius: 4px;
margin-bottom: 20px;
}
.category-title {
color: #374151;
font-size: 20px;
font-weight: 600;
margin: 0;
text-transform: capitalize;
}
.article {
margin-bottom: 30px;
padding-bottom: 25px;
border-bottom: 1px solid #e5e7eb;
}
.article:last-child {
border-bottom: none;
}
.article-header {
margin-bottom: 10px;
}
.article-title {
font-size: 18px;
font-weight: 600;
color: #1e40af;
text-decoration: none;
line-height: 1.4;
}
.article-title:hover {
color: #2563eb;
text-decoration: underline;
}
.article-meta {
font-size: 13px;
color: #6b7280;
margin: 8px 0;
}
.article-meta span {
margin-right: 15px;
}
.score-badge {
display: inline-block;
background-color: #dcfce7;
color: #166534;
padding: 3px 8px;
border-radius: 12px;
font-size: 12px;
font-weight: 600;
}
.article-summary {
color: #374151;
font-size: 15px;
line-height: 1.6;
margin: 12px 0;
}
.read-more {
display: inline-block;
color: #2563eb;
text-decoration: none;
font-size: 14px;
font-weight: 500;
margin-top: 8px;
}
.read-more:hover {
text-decoration: underline;
}
.footer {
margin-top: 40px;
padding-top: 20px;
border-top: 2px solid #e5e7eb;
text-align: center;
color: #6b7280;
font-size: 13px;
}
@media (max-width: 600px) {
body {
padding: 10px;
}
.container {
padding: 20px;
}
h1 {
font-size: 24px;
}
.article-title {
font-size: 16px;
}
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>{{ title }}</h1>
<div class="date">{{ date }}</div>
</div>
<div class="summary">
<p><strong>{{ total_articles }}</strong> articles curated from your personalized news sources</p>
<p><strong>{{ total_sources }}</strong> sources | <strong>{{ total_categories }}</strong> categories</p>
</div>
{% for category, articles in articles_by_category.items() %}
<div class="category-section">
<div class="category-header">
<h2 class="category-title">{{ category }}</h2>
</div>
{% for entry in articles %}
<article class="article">
<div class="article-header">
<a href="{{ entry.article.url }}" class="article-title" target="_blank" rel="noopener">
{{ entry.article.title }}
</a>
</div>
<div class="article-meta">
<span>{{ entry.article.source }}</span>
<span>{{ entry.article.published.strftime('%B %d, %Y at %H:%M') }}</span>
<span class="score-badge">Relevance: {{ "%.1f"|format(entry.relevance_score) }}/10</span>
</div>
<div class="article-summary">
{{ entry.ai_summary }}
</div>
<a href="{{ entry.article.url }}" class="read-more" target="_blank" rel="noopener">
Read full article →
</a>
</article>
{% endfor %}
</div>
{% endfor %}
<div class="footer">
<p>Generated by News Agent | Powered by OpenRouter AI</p>
<p>You received this because you subscribed to daily tech news digests</p>
</div>
</div>
</body>
</html>

54
src/logger.py Normal file
View File

@@ -0,0 +1,54 @@
"""Logging configuration for News Agent"""
import logging
import sys
from logging.handlers import RotatingFileHandler
from pathlib import Path
from .config import get_config
def setup_logger(name: str = "news-agent") -> logging.Logger:
"""Setup and configure logger with file and console handlers"""
config = get_config()
log_config = config.logging
# Create logger
logger = logging.getLogger(name)
logger.setLevel(getattr(logging, log_config.level.upper()))
# Avoid duplicate handlers
if logger.handlers:
return logger
# Create formatters
detailed_formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
simple_formatter = logging.Formatter("%(levelname)s - %(message)s")
# File handler with rotation
log_file = Path(log_config.file)
log_file.parent.mkdir(parents=True, exist_ok=True)
file_handler = RotatingFileHandler(
log_file, maxBytes=log_config.max_bytes, backupCount=log_config.backup_count
)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(detailed_formatter)
# Console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(simple_formatter)
# Add handlers
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
def get_logger(name: str = "news-agent") -> logging.Logger:
"""Get or create logger instance"""
return logging.getLogger(name)

153
src/main.py Normal file
View File

@@ -0,0 +1,153 @@
"""Main orchestrator for News Agent"""
import asyncio
from datetime import datetime
from .config import get_config
from .logger import setup_logger, get_logger
from .storage.database import Database
from .storage.models import DigestEntry
from .aggregator.rss_fetcher import RSSFetcher
from .ai.client import OpenRouterClient
from .ai.filter import ArticleFilter
from .ai.summarizer import ArticleSummarizer
from .email.generator import EmailGenerator
from .email.sender import EmailSender
async def main():
"""Main execution flow"""
# Setup logging
setup_logger()
logger = get_logger()
logger.info("=" * 60)
logger.info("News Agent starting...")
logger.info("=" * 60)
try:
# Load configuration
config = get_config()
# Initialize database
db = Database(config.database.path)
await db.initialize()
# Clean up old articles
await db.cleanup_old_articles(config.database.retention_days)
# Initialize RSS fetcher
fetcher = RSSFetcher()
# Fetch articles from all sources
logger.info(f"Fetching from {len(config.rss_sources)} RSS sources...")
articles = await fetcher.fetch_all(config.rss_sources)
if not articles:
logger.warning("No articles fetched from any source")
await fetcher.close()
return
# Save articles to database (deduplication)
new_articles_count = await db.save_articles(articles)
logger.info(
f"Saved {new_articles_count} new articles (filtered {len(articles) - new_articles_count} duplicates)"
)
await fetcher.close()
# Get unprocessed articles
unprocessed = await db.get_unprocessed_articles()
if not unprocessed:
logger.info("No new articles to process")
return
logger.info(f"Processing {len(unprocessed)} new articles with AI...")
# Initialize AI components
ai_client = OpenRouterClient()
filter_ai = ArticleFilter(ai_client)
summarizer = ArticleSummarizer(ai_client)
# Filter articles by relevance
logger.info("Filtering articles by relevance...")
filtered_articles = await filter_ai.filter_articles(
unprocessed, max_articles=config.ai.filtering.max_articles
)
if not filtered_articles:
logger.warning("No relevant articles found after filtering")
# Mark all as processed but not included
for article in unprocessed:
await db.update_article_processing(
article.id, relevance_score=0.0, ai_summary="", included=False
)
return
logger.info(f"Selected {len(filtered_articles)} relevant articles")
# Summarize filtered articles
logger.info("Generating AI summaries...")
digest_entries = []
for article, score in filtered_articles:
summary = await summarizer.summarize(article)
# Update database
await db.update_article_processing(
article.id, relevance_score=score, ai_summary=summary, included=True
)
# Create digest entry
entry = DigestEntry(
article=article,
relevance_score=score,
ai_summary=summary,
category=article.category,
)
digest_entries.append(entry)
# Mark remaining unprocessed articles as processed but not included
processed_ids = {article.id for article, _ in filtered_articles}
for article in unprocessed:
if article.id not in processed_ids:
await db.update_article_processing(
article.id, relevance_score=0.0, ai_summary="", included=False
)
# Generate email
logger.info("Generating email digest...")
generator = EmailGenerator()
date_str = datetime.now().strftime("%A, %B %d, %Y")
subject = config.email.subject_template.format(date=date_str)
html_content, text_content = generator.generate_digest_email(
digest_entries, date_str, subject
)
# Send email
logger.info("Sending email...")
sender = EmailSender()
success = sender.send(subject, html_content, text_content)
if success:
logger.info("=" * 60)
logger.info(f"Daily digest sent successfully with {len(digest_entries)} articles!")
logger.info("=" * 60)
else:
logger.error("Failed to send email")
except Exception as e:
logger.error(f"Fatal error in main execution: {e}", exc_info=True)
raise
def run():
"""Entry point for command-line execution"""
asyncio.run(main())
if __name__ == "__main__":
run()

1
src/storage/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Database storage for articles"""

194
src/storage/database.py Normal file
View File

@@ -0,0 +1,194 @@
"""SQLite database operations for article storage and deduplication"""
import aiosqlite
import json
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
from .models import Article
from ..logger import get_logger
logger = get_logger()
class Database:
"""Async SQLite database manager"""
def __init__(self, db_path: str | Path):
self.db_path = Path(db_path)
self.db_path.parent.mkdir(parents=True, exist_ok=True)
async def initialize(self):
"""Create database tables if they don't exist"""
async with aiosqlite.connect(self.db_path) as db:
await db.execute(
"""
CREATE TABLE IF NOT EXISTS articles (
id TEXT PRIMARY KEY,
url TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
summary TEXT,
content TEXT NOT NULL,
published TEXT NOT NULL,
source TEXT NOT NULL,
category TEXT NOT NULL,
fetched_at TEXT NOT NULL,
relevance_score REAL,
ai_summary TEXT,
processed INTEGER DEFAULT 0,
included_in_digest INTEGER DEFAULT 0
)
"""
)
await db.execute(
"""
CREATE INDEX IF NOT EXISTS idx_published
ON articles(published)
"""
)
await db.execute(
"""
CREATE INDEX IF NOT EXISTS idx_fetched_at
ON articles(fetched_at)
"""
)
await db.commit()
logger.info(f"Database initialized at {self.db_path}")
async def article_exists(self, article_id: str) -> bool:
"""Check if article already exists in database"""
async with aiosqlite.connect(self.db_path) as db:
async with db.execute("SELECT 1 FROM articles WHERE id = ?", (article_id,)) as cursor:
result = await cursor.fetchone()
return result is not None
async def save_article(self, article: Article) -> bool:
"""Save article to database. Returns True if saved, False if duplicate"""
if await self.article_exists(article.id):
logger.debug(f"Article already exists: {article.title}")
return False
async with aiosqlite.connect(self.db_path) as db:
await db.execute(
"""
INSERT INTO articles (
id, url, title, summary, content, published, source,
category, fetched_at, relevance_score, ai_summary,
processed, included_in_digest
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
article.id,
str(article.url),
article.title,
article.summary,
article.content,
article.published.isoformat(),
article.source,
article.category,
article.fetched_at.isoformat(),
article.relevance_score,
article.ai_summary,
int(article.processed),
int(article.included_in_digest),
),
)
await db.commit()
logger.debug(f"Saved article: {article.title}")
return True
async def save_articles(self, articles: list[Article]) -> int:
"""Save multiple articles. Returns count of new articles saved"""
count = 0
for article in articles:
if await self.save_article(article):
count += 1
return count
async def get_unprocessed_articles(self, limit: Optional[int] = None) -> list[Article]:
"""Get articles that haven't been processed by AI yet"""
query = """
SELECT * FROM articles
WHERE processed = 0
ORDER BY published DESC
"""
if limit:
query += f" LIMIT {limit}"
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute(query) as cursor:
rows = await cursor.fetchall()
return [self._row_to_article(row) for row in rows]
async def update_article_processing(
self, article_id: str, relevance_score: float, ai_summary: str, included: bool
):
"""Update article with AI processing results"""
async with aiosqlite.connect(self.db_path) as db:
await db.execute(
"""
UPDATE articles
SET relevance_score = ?,
ai_summary = ?,
processed = 1,
included_in_digest = ?
WHERE id = ?
""",
(relevance_score, ai_summary, int(included), article_id),
)
await db.commit()
async def get_todays_digest_articles(self) -> list[Article]:
"""Get all articles included in today's digest"""
today = datetime.now().date()
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"""
SELECT * FROM articles
WHERE included_in_digest = 1
AND date(fetched_at) = ?
ORDER BY relevance_score DESC, published DESC
""",
(today.isoformat(),),
) as cursor:
rows = await cursor.fetchall()
return [self._row_to_article(row) for row in rows]
async def cleanup_old_articles(self, retention_days: int):
"""Delete articles older than retention period"""
cutoff_date = datetime.now() - timedelta(days=retention_days)
async with aiosqlite.connect(self.db_path) as db:
cursor = await db.execute(
"DELETE FROM articles WHERE fetched_at < ?", (cutoff_date.isoformat(),)
)
deleted = cursor.rowcount
await db.commit()
if deleted > 0:
logger.info(f"Cleaned up {deleted} old articles")
def _row_to_article(self, row: aiosqlite.Row) -> Article:
"""Convert database row to Article model"""
return Article(
id=row["id"],
url=row["url"],
title=row["title"],
summary=row["summary"],
content=row["content"],
published=datetime.fromisoformat(row["published"]),
source=row["source"],
category=row["category"],
fetched_at=datetime.fromisoformat(row["fetched_at"]),
relevance_score=row["relevance_score"],
ai_summary=row["ai_summary"],
processed=bool(row["processed"]),
included_in_digest=bool(row["included_in_digest"]),
)

44
src/storage/models.py Normal file
View File

@@ -0,0 +1,44 @@
"""Data models for articles and digests"""
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, HttpUrl
class Article(BaseModel):
"""Article data model"""
id: str # Hash of URL
url: HttpUrl
title: str
summary: Optional[str] = None
content: str
published: datetime
source: str
category: str
fetched_at: datetime
# AI processing fields
relevance_score: Optional[float] = None
ai_summary: Optional[str] = None
processed: bool = False
included_in_digest: bool = False
class DigestEntry(BaseModel):
"""Entry in daily digest"""
article: Article
relevance_score: float
ai_summary: str
category: str
class DailyDigest(BaseModel):
"""Complete daily digest"""
date: str
entries: list[DigestEntry]
total_articles_processed: int
total_articles_included: int

1
src/utils/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Utility functions"""