first commit

2026-01-26 12:34:00 +01:00
commit e64465a7e6
29 changed files with 2952 additions and 0 deletions
--- a/src/init.py
+++ b/src/init.py
@@ -0,0 +1,3 @@
+"""News Agent - AI-powered daily tech news aggregator"""
+
+__version__ = "0.1.0"
--- a/src/aggregator/init.py
+++ b/src/aggregator/init.py
@@ -0,0 +1 @@
+"""News aggregation from various sources"""
--- a/src/aggregator/rss_fetcher.py
+++ b/src/aggregator/rss_fetcher.py
@@ -0,0 +1,162 @@
+"""RSS feed fetching and parsing"""
+
+import feedparser
+import httpx
+from datetime import datetime, timedelta, timezone
+from hashlib import sha256
+from typing import Optional
+from email.utils import parsedate_to_datetime
+
+from ..config import RSSSource
+from ..storage.models import Article
+from ..logger import get_logger
+
+logger = get_logger()
+
+
+class RSSFetcher:
+    """Fetch and parse RSS feeds"""
+
+    def __init__(self, timeout: int = 30, hours_lookback: int = 24):
+        """
+        Initialize RSS fetcher
+
+        Args:
+            timeout: HTTP request timeout in seconds
+            hours_lookback: How many hours back to fetch articles
+        """
+        self.timeout = timeout
+        self.hours_lookback = hours_lookback
+        self.client = httpx.AsyncClient(
+            timeout=timeout,
+            follow_redirects=True,
+            headers={"User-Agent": "NewsAgent/1.0 RSS Reader"},
+        )
+
+    async def close(self):
+        """Close HTTP client"""
+        await self.client.aclose()
+
+    async def fetch(self, source: RSSSource) -> list[Article]:
+        """
+        Fetch and parse articles from RSS feed
+
+        Args:
+            source: RSS source configuration
+
+        Returns:
+            List of Article objects from the feed
+        """
+        try:
+            logger.info(f"Fetching RSS feed: {source.name}")
+            response = await self.client.get(str(source.url))
+            response.raise_for_status()
+
+            # Parse feed
+            feed = feedparser.parse(response.text)
+
+            if feed.bozo:
+                logger.warning(f"Feed parsing warning for {source.name}: {feed.bozo_exception}")
+
+            articles = []
+            cutoff_time = datetime.now(timezone.utc) - timedelta(hours=self.hours_lookback)
+
+            for entry in feed.entries:
+                try:
+                    article = self._parse_entry(entry, source)
+                    if article and article.published >= cutoff_time:
+                        articles.append(article)
+                except Exception as e:
+                    logger.warning(f"Failed to parse entry from {source.name}: {e}")
+                    continue
+
+            logger.info(f"Fetched {len(articles)} articles from {source.name}")
+            return articles
+
+        except httpx.HTTPError as e:
+            logger.error(f"HTTP error fetching {source.name}: {e}")
+            return []
+        except Exception as e:
+            logger.error(f"Unexpected error fetching {source.name}: {e}")
+            return []
+
+    def _parse_entry(
+        self, entry: feedparser.FeedParserDict, source: RSSSource
+    ) -> Optional[Article]:
+        """Parse a single RSS entry into an Article"""
+        # Get URL
+        url = entry.get("link")
+        if not url:
+            return None
+
+        # Generate unique ID from URL
+        article_id = sha256(url.encode()).hexdigest()
+
+        # Get title
+        title = entry.get("title", "Untitled")
+
+        # Get published date
+        published = self._parse_date(entry)
+        if not published:
+            published = datetime.now(timezone.utc)
+
+        # Get summary/content
+        summary = entry.get("summary", None)
+        content = entry.get("content", [{}])[0].get("value", entry.get("description", title))
+
+        # Remove HTML tags from content if needed (basic cleanup)
+        if content == title:
+            content = summary or title
+
+        return Article(
+            id=article_id,
+            url=url,
+            title=title,
+            summary=summary,
+            content=content,
+            published=published,
+            source=source.name,
+            category=source.category,
+            fetched_at=datetime.now(timezone.utc),
+        )
+
+    def _parse_date(self, entry: feedparser.FeedParserDict) -> Optional[datetime]:
+        """Parse published date from entry"""
+        # Try different date fields
+        for date_field in ["published_parsed", "updated_parsed", "created_parsed"]:
+            if date_field in entry and entry[date_field]:
+                try:
+                    time_tuple = entry[date_field]
+                    dt = datetime(*time_tuple[:6], tzinfo=timezone.utc)
+                    return dt
+                except (TypeError, ValueError):
+                    continue
+
+        # Try parsing date strings
+        for date_field in ["published", "updated", "created"]:
+            if date_field in entry and entry[date_field]:
+                try:
+                    return parsedate_to_datetime(entry[date_field])
+                except (TypeError, ValueError):
+                    continue
+
+        return None
+
+    async def fetch_all(self, sources: list[RSSSource]) -> list[Article]:
+        """
+        Fetch articles from multiple RSS sources concurrently
+
+        Args:
+            sources: List of RSS sources to fetch
+
+        Returns:
+            Combined list of articles from all sources
+        """
+        all_articles = []
+
+        for source in sources:
+            articles = await self.fetch(source)
+            all_articles.extend(articles)
+
+        logger.info(f"Total articles fetched from all sources: {len(all_articles)}")
+        return all_articles
--- a/src/ai/init.py
+++ b/src/ai/init.py
@@ -0,0 +1 @@
+"""AI processing using OpenRouter"""
--- a/src/ai/client.py
+++ b/src/ai/client.py
@@ -0,0 +1,117 @@
+"""OpenRouter API client"""
+
+import json
+from typing import Any
+
+from openai import AsyncOpenAI
+
+from ..config import get_config
+from ..logger import get_logger
+
+logger = get_logger()
+
+
+class OpenRouterClient:
+    """Async client for OpenRouter API"""
+
+    def __init__(self):
+        config = get_config()
+        env = config.env
+
+        self.client = AsyncOpenAI(
+            base_url=config.ai.base_url,
+            api_key=env.openrouter_api_key,
+            default_headers={
+                **({"HTTP-Referer": env.openrouter_site_url} if env.openrouter_site_url else {}),
+                **({"X-Title": env.openrouter_site_name} if env.openrouter_site_name else {}),
+            },
+        )
+
+        self.model = config.ai.model
+        logger.info(f"Initialized OpenRouter client with model: {self.model}")
+
+    async def chat_completion(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        temperature: float = 0.7,
+        max_tokens: int = 1000,
+        json_mode: bool = False,
+    ) -> str:
+        """
+        Send chat completion request
+
+        Args:
+            system_prompt: System instruction
+            user_prompt: User message
+            temperature: Sampling temperature (0-2)
+            max_tokens: Maximum tokens to generate
+            json_mode: Enable JSON response format
+
+        Returns:
+            Response content as string
+        """
+        try:
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ]
+
+            kwargs: dict[str, Any] = {
+                "model": self.model,
+                "messages": messages,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+            }
+
+            if json_mode:
+                kwargs["response_format"] = {"type": "json_object"}
+
+            response = await self.client.chat.completions.create(**kwargs)
+
+            content = response.choices[0].message.content
+            if not content:
+                raise ValueError("Empty response from API")
+
+            # Log token usage
+            if response.usage:
+                logger.debug(
+                    f"Tokens used - Prompt: {response.usage.prompt_tokens}, "
+                    f"Completion: {response.usage.completion_tokens}, "
+                    f"Total: {response.usage.total_tokens}"
+                )
+
+            return content
+
+        except Exception as e:
+            logger.error(f"OpenRouter API error: {e}")
+            raise
+
+    async def chat_completion_json(
+        self, system_prompt: str, user_prompt: str, temperature: float = 0.3, max_tokens: int = 500
+    ) -> dict[str, Any]:
+        """
+        Send chat completion request expecting JSON response
+
+        Args:
+            system_prompt: System instruction
+            user_prompt: User message
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Parsed JSON response as dictionary
+        """
+        content = await self.chat_completion(
+            system_prompt=system_prompt,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            json_mode=True,
+        )
+
+        try:
+            return json.loads(content)
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse JSON response: {content}")
+            raise ValueError(f"Invalid JSON response: {e}")
--- a/src/ai/filter.py
+++ b/src/ai/filter.py
@@ -0,0 +1,118 @@
+"""Article relevance filtering using AI"""
+
+from typing import Optional
+
+from ..storage.models import Article
+from ..config import get_config
+from ..logger import get_logger
+from .client import OpenRouterClient
+from .prompts import FILTERING_SYSTEM_PROMPT, FILTERING_USER_PROMPT
+
+logger = get_logger()
+
+
+class ArticleFilter:
+    """Filter articles by relevance using AI"""
+
+    def __init__(self, client: OpenRouterClient):
+        self.client = client
+        config = get_config()
+        self.interests = config.ai.interests
+        self.min_score = config.ai.filtering.min_score
+
+    async def score_article(self, article: Article) -> Optional[float]:
+        """
+        Score article relevance (0-10)
+
+        Args:
+            article: Article to score
+
+        Returns:
+            Relevance score or None if scoring fails
+        """
+        try:
+            # Prepare prompts
+            system_prompt = FILTERING_SYSTEM_PROMPT.format(
+                interests="\n".join(f"- {interest}" for interest in self.interests)
+            )
+
+            # Truncate content for API call
+            content_preview = article.content[:500] + ("..." if len(article.content) > 500 else "")
+
+            user_prompt = FILTERING_USER_PROMPT.format(
+                title=article.title,
+                source=article.source,
+                category=article.category,
+                content=content_preview,
+            )
+
+            # Get score from AI
+            response = await self.client.chat_completion_json(
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                temperature=0.3,
+                max_tokens=200,
+            )
+
+            score = float(response.get("score", 0))
+            reason = response.get("reason", "No reason provided")
+
+            logger.debug(f"Article '{article.title}' scored {score:.1f}: {reason}")
+
+            return score
+
+        except Exception as e:
+            logger.error(f"Failed to score article '{article.title}': {e}")
+            return None
+
+    async def is_relevant(self, article: Article) -> tuple[bool, Optional[float]]:
+        """
+        Check if article meets relevance threshold
+
+        Args:
+            article: Article to check
+
+        Returns:
+            Tuple of (is_relevant, score)
+        """
+        score = await self.score_article(article)
+
+        if score is None:
+            return False, None
+
+        is_relevant = score >= self.min_score
+        return is_relevant, score
+
+    async def filter_articles(
+        self, articles: list[Article], max_articles: Optional[int] = None
+    ) -> list[tuple[Article, float]]:
+        """
+        Filter and rank articles by relevance
+
+        Args:
+            articles: Articles to filter
+            max_articles: Maximum number of articles to return
+
+        Returns:
+            List of (article, score) tuples, sorted by score descending
+        """
+        scored_articles: list[tuple[Article, float]] = []
+
+        for article in articles:
+            is_relevant, score = await self.is_relevant(article)
+
+            if is_relevant and score is not None:
+                scored_articles.append((article, score))
+
+        # Sort by score descending
+        scored_articles.sort(key=lambda x: x[1], reverse=True)
+
+        # Apply limit if specified
+        if max_articles:
+            scored_articles = scored_articles[:max_articles]
+
+        logger.info(
+            f"Filtered {len(articles)} articles down to {len(scored_articles)} relevant ones"
+        )
+
+        return scored_articles
--- a/src/ai/prompts.py
+++ b/src/ai/prompts.py
@@ -0,0 +1,60 @@
+"""Prompt templates for AI processing"""
+
+FILTERING_SYSTEM_PROMPT = """You are a news relevance analyzer. Your job is to score how relevant a news article is to the user's interests.
+
+User Interests:
+{interests}
+
+Score the article on a scale of 0-10 based on:
+- Direct relevance to stated interests (0-4 points)
+- Quality and depth of content (0-3 points)
+- Timeliness and importance (0-3 points)
+
+Return ONLY a JSON object with this exact format:
+{{"score": <float>, "reason": "<brief explanation>"}}
+
+Be strict - only highly relevant articles should score above 7.0."""
+
+FILTERING_USER_PROMPT = """Article Title: {title}
+
+Source: {source}
+
+Category: {category}
+
+Content Preview: {content}
+
+Score this article's relevance (0-10) and explain why."""
+
+
+SUMMARIZATION_SYSTEM_PROMPT = """You are a technical news summarizer. Create concise, informative summaries of tech articles.
+
+Guidelines:
+- Focus on key facts, findings, and implications
+- Include important technical details
+- Keep summaries to 2-3 sentences
+- Use clear, professional language
+- Highlight what makes this newsworthy
+
+Return ONLY the summary text, no additional formatting."""
+
+SUMMARIZATION_USER_PROMPT = """Title: {title}
+
+Source: {source}
+
+Content: {content}
+
+Write a concise 2-3 sentence summary highlighting the key information."""
+
+
+BATCH_SUMMARIZATION_SYSTEM_PROMPT = """You are a technical news summarizer. Create concise summaries for multiple articles.
+
+For each article, provide a 2-3 sentence summary that:
+- Captures the key facts and findings
+- Highlights technical details
+- Explains why it's newsworthy
+
+Return a JSON array with this exact format:
+[
+  {{"id": "<article_id>", "summary": "<2-3 sentence summary>"}},
+  ...
+]"""
--- a/src/ai/summarizer.py
+++ b/src/ai/summarizer.py
@@ -0,0 +1,72 @@
+"""Article summarization using AI"""
+
+from ..storage.models import Article
+from ..logger import get_logger
+from .client import OpenRouterClient
+from .prompts import SUMMARIZATION_SYSTEM_PROMPT, SUMMARIZATION_USER_PROMPT
+
+logger = get_logger()
+
+
+class ArticleSummarizer:
+    """Summarize articles using AI"""
+
+    def __init__(self, client: OpenRouterClient):
+        self.client = client
+
+    async def summarize(self, article: Article) -> str:
+        """
+        Generate concise summary of article
+
+        Args:
+            article: Article to summarize
+
+        Returns:
+            Summary text (2-3 sentences)
+        """
+        try:
+            # Truncate content if too long
+            max_content_length = 2000
+            content = article.content
+            if len(content) > max_content_length:
+                content = content[:max_content_length] + "..."
+
+            user_prompt = SUMMARIZATION_USER_PROMPT.format(
+                title=article.title, source=article.source, content=content
+            )
+
+            summary = await self.client.chat_completion(
+                system_prompt=SUMMARIZATION_SYSTEM_PROMPT,
+                user_prompt=user_prompt,
+                temperature=0.5,
+                max_tokens=300,
+            )
+
+            logger.debug(f"Summarized article: {article.title}")
+            return summary.strip()
+
+        except Exception as e:
+            logger.error(f"Failed to summarize article '{article.title}': {e}")
+            # Fallback to original summary or truncated content
+            if article.summary:
+                return article.summary
+            return article.content[:200] + "..."
+
+    async def summarize_batch(self, articles: list[Article]) -> dict[str, str]:
+        """
+        Summarize multiple articles
+
+        Args:
+            articles: List of articles to summarize
+
+        Returns:
+            Dictionary mapping article IDs to summaries
+        """
+        summaries = {}
+
+        for article in articles:
+            summary = await self.summarize(article)
+            summaries[article.id] = summary
+
+        logger.info(f"Summarized {len(summaries)} articles")
+        return summaries
--- a/src/config.py
+++ b/src/config.py
@@ -0,0 +1,156 @@
+"""Configuration management for News Agent"""
+
+import yaml
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, Field, HttpUrl
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class RSSSource(BaseModel):
+    """RSS feed source configuration"""
+
+    name: str
+    url: HttpUrl
+    category: str
+
+
+class AIFilteringConfig(BaseModel):
+    """AI filtering configuration"""
+
+    enabled: bool = True
+    min_score: float = Field(ge=0, le=10, default=6.5)
+    max_articles: int = Field(ge=1, default=15)
+
+
+class AIConfig(BaseModel):
+    """AI processing configuration"""
+
+    provider: str = "openrouter"
+    base_url: str = "https://openrouter.ai/api/v1"
+    model: str = "google/gemini-flash-1.5"
+    filtering: AIFilteringConfig
+    interests: list[str]
+
+
+class SMTPConfig(BaseModel):
+    """SMTP server configuration"""
+
+    host: str = "localhost"
+    port: int = 25
+    use_tls: bool = False
+    use_ssl: bool = False
+    username: str | None = None
+    password: str | None = None
+
+
+class EmailConfig(BaseModel):
+    """Email configuration"""
+
+    to: str
+    from_: str = Field(alias="from")
+    from_name: str = "Daily Tech News Agent"
+    subject_template: str = "Tech News Digest - {date}"
+    smtp: SMTPConfig
+
+
+class ScheduleConfig(BaseModel):
+    """Schedule configuration"""
+
+    time: str = "07:00"
+    timezone: str = "Europe/Oslo"
+
+
+class DatabaseConfig(BaseModel):
+    """Database configuration"""
+
+    path: str = "data/articles.db"
+    retention_days: int = 30
+
+
+class LoggingConfig(BaseModel):
+    """Logging configuration"""
+
+    level: str = "INFO"
+    file: str = "data/logs/news-agent.log"
+    max_bytes: int = 10485760
+    backup_count: int = 5
+
+
+class EnvSettings(BaseSettings):
+    """Environment variable settings"""
+
+    model_config = SettingsConfigDict(env_file=".env", case_sensitive=False, extra="ignore")
+
+    openrouter_api_key: str
+    openrouter_site_url: str | None = None
+    openrouter_site_name: str | None = None
+    smtp_username: str | None = None
+    smtp_password: str | None = None
+    error_notification_email: str | None = None
+
+
+class Config:
+    """Main configuration manager"""
+
+    def __init__(self, config_path: str | Path = "config.yaml"):
+        """Load configuration from YAML file and environment variables"""
+        self.config_path = Path(config_path)
+
+        # Load YAML configuration
+        with open(self.config_path) as f:
+            self._config: dict[str, Any] = yaml.safe_load(f)
+
+        # Load environment variables
+        self.env = EnvSettings()
+
+    @property
+    def rss_sources(self) -> list[RSSSource]:
+        """Get all RSS sources"""
+        return [RSSSource(**src) for src in self._config["sources"]["rss"]]
+
+    @property
+    def ai(self) -> AIConfig:
+        """Get AI configuration"""
+        return AIConfig(**self._config["ai"])
+
+    @property
+    def email(self) -> EmailConfig:
+        """Get email configuration"""
+        email_config = self._config["email"].copy()
+
+        # Merge SMTP credentials from environment variables
+        if self.env.smtp_username:
+            email_config["smtp"]["username"] = self.env.smtp_username
+        if self.env.smtp_password:
+            email_config["smtp"]["password"] = self.env.smtp_password
+
+        return EmailConfig(**email_config)
+
+    @property
+    def schedule(self) -> ScheduleConfig:
+        """Get schedule configuration"""
+        return ScheduleConfig(**self._config["schedule"])
+
+    @property
+    def database(self) -> DatabaseConfig:
+        """Get database configuration"""
+        return DatabaseConfig(**self._config["database"])
+
+    @property
+    def logging(self) -> LoggingConfig:
+        """Get logging configuration"""
+        return LoggingConfig(**self._config["logging"])
+
+
+# Global config instance (lazy loaded)
+_config: Config | None = None
+
+
+def get_config() -> Config:
+    """Get or create global config instance"""
+    global _config
+    if _config is None:
+        _config = Config()
+    return _config
--- a/src/email/init.py
+++ b/src/email/init.py
@@ -0,0 +1 @@
+"""Email generation and sending"""
--- a/src/email/generator.py
+++ b/src/email/generator.py
@@ -0,0 +1,114 @@
+"""Email HTML generation from digest data"""
+
+from datetime import datetime
+from pathlib import Path
+from collections import defaultdict
+
+from jinja2 import Environment, FileSystemLoader
+from premailer import transform
+
+from ..storage.models import DigestEntry
+from ..logger import get_logger
+
+logger = get_logger()
+
+
+class EmailGenerator:
+    """Generate HTML emails from digest data"""
+
+    def __init__(self):
+        # Set up Jinja2 template environment
+        template_dir = Path(__file__).parent / "templates"
+        self.env = Environment(loader=FileSystemLoader(template_dir))
+
+    def generate_digest_email(
+        self, entries: list[DigestEntry], date_str: str, subject: str
+    ) -> tuple[str, str]:
+        """
+        Generate HTML email for daily digest
+
+        Args:
+            entries: List of digest entries (articles with summaries)
+            date_str: Date string for the digest
+            subject: Email subject line
+
+        Returns:
+            Tuple of (html_content, text_content)
+        """
+        # Group articles by category
+        articles_by_category = defaultdict(list)
+        for entry in entries:
+            articles_by_category[entry.category].append(entry)
+
+        # Sort categories
+        sorted_categories = sorted(articles_by_category.keys())
+
+        # Get unique sources count
+        unique_sources = len(set(entry.article.source for entry in entries))
+
+        # Prepare template data
+        template_data = {
+            "title": subject,
+            "date": date_str,
+            "total_articles": len(entries),
+            "total_sources": unique_sources,
+            "total_categories": len(sorted_categories),
+            "articles_by_category": {cat: articles_by_category[cat] for cat in sorted_categories},
+        }
+
+        # Render HTML template
+        template = self.env.get_template("daily_digest.html")
+        html = template.render(**template_data)
+
+        # Inline CSS for email compatibility
+        html_inlined = transform(html)
+
+        # Generate plain text version
+        text = self._generate_text_version(entries, date_str, subject)
+
+        logger.info(f"Generated email with {len(entries)} articles")
+
+        return html_inlined, text
+
+    def _generate_text_version(
+        self, entries: list[DigestEntry], date_str: str, subject: str
+    ) -> str:
+        """Generate plain text version of email"""
+        lines = [
+            subject,
+            "=" * len(subject),
+            "",
+            f"Date: {date_str}",
+            f"Total Articles: {len(entries)}",
+            "",
+            "",
+        ]
+
+        # Group by category
+        articles_by_category = defaultdict(list)
+        for entry in entries:
+            articles_by_category[entry.category].append(entry)
+
+        # Output each category
+        for category in sorted(articles_by_category.keys()):
+            lines.append(f"{category.upper()}")
+            lines.append("-" * len(category))
+            lines.append("")
+
+            for entry in articles_by_category[category]:
+                article = entry.article
+                lines.append(f"• {article.title}")
+                lines.append(f"  Source: {article.source}")
+                lines.append(f"  Published: {article.published.strftime('%B %d, %Y at %H:%M')}")
+                lines.append(f"  Relevance: {entry.relevance_score:.1f}/10")
+                lines.append(f"  URL: {article.url}")
+                lines.append(f"  Summary: {entry.ai_summary}")
+                lines.append("")
+
+            lines.append("")
+
+        lines.append("")
+        lines.append("---")
+        lines.append("Generated by News Agent | Powered by OpenRouter AI")
+
+        return "\n".join(lines)
--- a/src/email/sender.py
+++ b/src/email/sender.py
@@ -0,0 +1,77 @@
+"""Email sending via SMTP"""
+
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from email.utils import formatdate
+
+from ..config import get_config
+from ..logger import get_logger
+
+logger = get_logger()
+
+
+class EmailSender:
+    """Send emails via SMTP"""
+
+    def __init__(self):
+        config = get_config()
+        self.config = config.email
+
+    def send(self, subject: str, html_content: str, text_content: str) -> bool:
+        """
+        Send email with HTML and plain text versions
+
+        Args:
+            subject: Email subject line
+            html_content: HTML email body
+            text_content: Plain text email body
+
+        Returns:
+            True if sent successfully, False otherwise
+        """
+        try:
+            # Create message
+            msg = MIMEMultipart("alternative")
+            msg["Subject"] = subject
+            msg["From"] = f"{self.config.from_name} <{self.config.from_}>"
+            msg["To"] = self.config.to
+            msg["Date"] = formatdate(localtime=True)
+
+            # Attach parts
+            part_text = MIMEText(text_content, "plain", "utf-8")
+            part_html = MIMEText(html_content, "html", "utf-8")
+
+            msg.attach(part_text)
+            msg.attach(part_html)
+
+            # Send via SMTP
+            smtp_config = self.config.smtp
+
+            if smtp_config.use_ssl:
+                server = smtplib.SMTP_SSL(smtp_config.host, smtp_config.port)
+            else:
+                server = smtplib.SMTP(smtp_config.host, smtp_config.port)
+
+            try:
+                if smtp_config.use_tls and not smtp_config.use_ssl:
+                    server.starttls()
+
+                # Login if credentials provided
+                if smtp_config.username and smtp_config.password:
+                    server.login(smtp_config.username, smtp_config.password)
+
+                # Send email
+                server.send_message(msg)
+                logger.info(f"Email sent successfully to {self.config.to}")
+                return True
+
+            finally:
+                server.quit()
+
+        except smtplib.SMTPException as e:
+            logger.error(f"SMTP error sending email: {e}")
+            return False
+        except Exception as e:
+            logger.error(f"Unexpected error sending email: {e}")
+            return False
--- a/src/email/templates/daily_digest.html
+++ b/src/email/templates/daily_digest.html
@@ -0,0 +1,194 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{ title }}</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+            line-height: 1.6;
+            color: #333;
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f5f5f5;
+        }
+        .container {
+            background-color: #ffffff;
+            border-radius: 8px;
+            padding: 30px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .header {
+            border-bottom: 3px solid #2563eb;
+            padding-bottom: 20px;
+            margin-bottom: 30px;
+        }
+        h1 {
+            color: #1e40af;
+            margin: 0 0 10px 0;
+            font-size: 28px;
+        }
+        .date {
+            color: #6b7280;
+            font-size: 14px;
+        }
+        .summary {
+            background-color: #eff6ff;
+            border-left: 4px solid #2563eb;
+            padding: 15px;
+            margin-bottom: 30px;
+            border-radius: 4px;
+        }
+        .summary p {
+            margin: 5px 0;
+            font-size: 14px;
+        }
+        .category-section {
+            margin-bottom: 40px;
+        }
+        .category-header {
+            background-color: #f3f4f6;
+            padding: 10px 15px;
+            border-radius: 4px;
+            margin-bottom: 20px;
+        }
+        .category-title {
+            color: #374151;
+            font-size: 20px;
+            font-weight: 600;
+            margin: 0;
+            text-transform: capitalize;
+        }
+        .article {
+            margin-bottom: 30px;
+            padding-bottom: 25px;
+            border-bottom: 1px solid #e5e7eb;
+        }
+        .article:last-child {
+            border-bottom: none;
+        }
+        .article-header {
+            margin-bottom: 10px;
+        }
+        .article-title {
+            font-size: 18px;
+            font-weight: 600;
+            color: #1e40af;
+            text-decoration: none;
+            line-height: 1.4;
+        }
+        .article-title:hover {
+            color: #2563eb;
+            text-decoration: underline;
+        }
+        .article-meta {
+            font-size: 13px;
+            color: #6b7280;
+            margin: 8px 0;
+        }
+        .article-meta span {
+            margin-right: 15px;
+        }
+        .score-badge {
+            display: inline-block;
+            background-color: #dcfce7;
+            color: #166534;
+            padding: 3px 8px;
+            border-radius: 12px;
+            font-size: 12px;
+            font-weight: 600;
+        }
+        .article-summary {
+            color: #374151;
+            font-size: 15px;
+            line-height: 1.6;
+            margin: 12px 0;
+        }
+        .read-more {
+            display: inline-block;
+            color: #2563eb;
+            text-decoration: none;
+            font-size: 14px;
+            font-weight: 500;
+            margin-top: 8px;
+        }
+        .read-more:hover {
+            text-decoration: underline;
+        }
+        .footer {
+            margin-top: 40px;
+            padding-top: 20px;
+            border-top: 2px solid #e5e7eb;
+            text-align: center;
+            color: #6b7280;
+            font-size: 13px;
+        }
+        @media (max-width: 600px) {
+            body {
+                padding: 10px;
+            }
+            .container {
+                padding: 20px;
+            }
+            h1 {
+                font-size: 24px;
+            }
+            .article-title {
+                font-size: 16px;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>{{ title }}</h1>
+            <div class="date">{{ date }}</div>
+        </div>
+
+        <div class="summary">
+            <p><strong>{{ total_articles }}</strong> articles curated from your personalized news sources</p>
+            <p><strong>{{ total_sources }}</strong> sources | <strong>{{ total_categories }}</strong> categories</p>
+        </div>
+
+        {% for category, articles in articles_by_category.items() %}
+        <div class="category-section">
+            <div class="category-header">
+                <h2 class="category-title">{{ category }}</h2>
+            </div>
+
+            {% for entry in articles %}
+            <article class="article">
+                <div class="article-header">
+                    <a href="{{ entry.article.url }}" class="article-title" target="_blank" rel="noopener">
+                        {{ entry.article.title }}
+                    </a>
+                </div>
+
+                <div class="article-meta">
+                    <span>{{ entry.article.source }}</span>
+                    <span>{{ entry.article.published.strftime('%B %d, %Y at %H:%M') }}</span>
+                    <span class="score-badge">Relevance: {{ "%.1f"|format(entry.relevance_score) }}/10</span>
+                </div>
+
+                <div class="article-summary">
+                    {{ entry.ai_summary }}
+                </div>
+
+                <a href="{{ entry.article.url }}" class="read-more" target="_blank" rel="noopener">
+                    Read full article →
+                </a>
+            </article>
+            {% endfor %}
+        </div>
+        {% endfor %}
+
+        <div class="footer">
+            <p>Generated by News Agent | Powered by OpenRouter AI</p>
+            <p>You received this because you subscribed to daily tech news digests</p>
+        </div>
+    </div>
+</body>
+</html>
--- a/src/logger.py
+++ b/src/logger.py
@@ -0,0 +1,54 @@
+"""Logging configuration for News Agent"""
+
+import logging
+import sys
+from logging.handlers import RotatingFileHandler
+from pathlib import Path
+
+from .config import get_config
+
+
+def setup_logger(name: str = "news-agent") -> logging.Logger:
+    """Setup and configure logger with file and console handlers"""
+    config = get_config()
+    log_config = config.logging
+
+    # Create logger
+    logger = logging.getLogger(name)
+    logger.setLevel(getattr(logging, log_config.level.upper()))
+
+    # Avoid duplicate handlers
+    if logger.handlers:
+        return logger
+
+    # Create formatters
+    detailed_formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
+    )
+    simple_formatter = logging.Formatter("%(levelname)s - %(message)s")
+
+    # File handler with rotation
+    log_file = Path(log_config.file)
+    log_file.parent.mkdir(parents=True, exist_ok=True)
+
+    file_handler = RotatingFileHandler(
+        log_file, maxBytes=log_config.max_bytes, backupCount=log_config.backup_count
+    )
+    file_handler.setLevel(logging.DEBUG)
+    file_handler.setFormatter(detailed_formatter)
+
+    # Console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(simple_formatter)
+
+    # Add handlers
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+
+    return logger
+
+
+def get_logger(name: str = "news-agent") -> logging.Logger:
+    """Get or create logger instance"""
+    return logging.getLogger(name)
--- a/src/main.py
+++ b/src/main.py
@@ -0,0 +1,153 @@
+"""Main orchestrator for News Agent"""
+
+import asyncio
+from datetime import datetime
+
+from .config import get_config
+from .logger import setup_logger, get_logger
+from .storage.database import Database
+from .storage.models import DigestEntry
+from .aggregator.rss_fetcher import RSSFetcher
+from .ai.client import OpenRouterClient
+from .ai.filter import ArticleFilter
+from .ai.summarizer import ArticleSummarizer
+from .email.generator import EmailGenerator
+from .email.sender import EmailSender
+
+
+async def main():
+    """Main execution flow"""
+    # Setup logging
+    setup_logger()
+    logger = get_logger()
+
+    logger.info("=" * 60)
+    logger.info("News Agent starting...")
+    logger.info("=" * 60)
+
+    try:
+        # Load configuration
+        config = get_config()
+
+        # Initialize database
+        db = Database(config.database.path)
+        await db.initialize()
+
+        # Clean up old articles
+        await db.cleanup_old_articles(config.database.retention_days)
+
+        # Initialize RSS fetcher
+        fetcher = RSSFetcher()
+
+        # Fetch articles from all sources
+        logger.info(f"Fetching from {len(config.rss_sources)} RSS sources...")
+        articles = await fetcher.fetch_all(config.rss_sources)
+
+        if not articles:
+            logger.warning("No articles fetched from any source")
+            await fetcher.close()
+            return
+
+        # Save articles to database (deduplication)
+        new_articles_count = await db.save_articles(articles)
+        logger.info(
+            f"Saved {new_articles_count} new articles (filtered {len(articles) - new_articles_count} duplicates)"
+        )
+
+        await fetcher.close()
+
+        # Get unprocessed articles
+        unprocessed = await db.get_unprocessed_articles()
+
+        if not unprocessed:
+            logger.info("No new articles to process")
+            return
+
+        logger.info(f"Processing {len(unprocessed)} new articles with AI...")
+
+        # Initialize AI components
+        ai_client = OpenRouterClient()
+        filter_ai = ArticleFilter(ai_client)
+        summarizer = ArticleSummarizer(ai_client)
+
+        # Filter articles by relevance
+        logger.info("Filtering articles by relevance...")
+        filtered_articles = await filter_ai.filter_articles(
+            unprocessed, max_articles=config.ai.filtering.max_articles
+        )
+
+        if not filtered_articles:
+            logger.warning("No relevant articles found after filtering")
+            # Mark all as processed but not included
+            for article in unprocessed:
+                await db.update_article_processing(
+                    article.id, relevance_score=0.0, ai_summary="", included=False
+                )
+            return
+
+        logger.info(f"Selected {len(filtered_articles)} relevant articles")
+
+        # Summarize filtered articles
+        logger.info("Generating AI summaries...")
+        digest_entries = []
+
+        for article, score in filtered_articles:
+            summary = await summarizer.summarize(article)
+
+            # Update database
+            await db.update_article_processing(
+                article.id, relevance_score=score, ai_summary=summary, included=True
+            )
+
+            # Create digest entry
+            entry = DigestEntry(
+                article=article,
+                relevance_score=score,
+                ai_summary=summary,
+                category=article.category,
+            )
+            digest_entries.append(entry)
+
+        # Mark remaining unprocessed articles as processed but not included
+        processed_ids = {article.id for article, _ in filtered_articles}
+        for article in unprocessed:
+            if article.id not in processed_ids:
+                await db.update_article_processing(
+                    article.id, relevance_score=0.0, ai_summary="", included=False
+                )
+
+        # Generate email
+        logger.info("Generating email digest...")
+        generator = EmailGenerator()
+
+        date_str = datetime.now().strftime("%A, %B %d, %Y")
+        subject = config.email.subject_template.format(date=date_str)
+
+        html_content, text_content = generator.generate_digest_email(
+            digest_entries, date_str, subject
+        )
+
+        # Send email
+        logger.info("Sending email...")
+        sender = EmailSender()
+        success = sender.send(subject, html_content, text_content)
+
+        if success:
+            logger.info("=" * 60)
+            logger.info(f"Daily digest sent successfully with {len(digest_entries)} articles!")
+            logger.info("=" * 60)
+        else:
+            logger.error("Failed to send email")
+
+    except Exception as e:
+        logger.error(f"Fatal error in main execution: {e}", exc_info=True)
+        raise
+
+
+def run():
+    """Entry point for command-line execution"""
+    asyncio.run(main())
+
+
+if __name__ == "__main__":
+    run()
--- a/src/storage/init.py
+++ b/src/storage/init.py
@@ -0,0 +1 @@
+"""Database storage for articles"""
--- a/src/storage/database.py
+++ b/src/storage/database.py
@@ -0,0 +1,194 @@
+"""SQLite database operations for article storage and deduplication"""
+
+import aiosqlite
+import json
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional
+
+from .models import Article
+from ..logger import get_logger
+
+logger = get_logger()
+
+
+class Database:
+    """Async SQLite database manager"""
+
+    def __init__(self, db_path: str | Path):
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+
+    async def initialize(self):
+        """Create database tables if they don't exist"""
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                CREATE TABLE IF NOT EXISTS articles (
+                    id TEXT PRIMARY KEY,
+                    url TEXT NOT NULL UNIQUE,
+                    title TEXT NOT NULL,
+                    summary TEXT,
+                    content TEXT NOT NULL,
+                    published TEXT NOT NULL,
+                    source TEXT NOT NULL,
+                    category TEXT NOT NULL,
+                    fetched_at TEXT NOT NULL,
+                    relevance_score REAL,
+                    ai_summary TEXT,
+                    processed INTEGER DEFAULT 0,
+                    included_in_digest INTEGER DEFAULT 0
+                )
+            """
+            )
+
+            await db.execute(
+                """
+                CREATE INDEX IF NOT EXISTS idx_published 
+                ON articles(published)
+            """
+            )
+
+            await db.execute(
+                """
+                CREATE INDEX IF NOT EXISTS idx_fetched_at 
+                ON articles(fetched_at)
+            """
+            )
+
+            await db.commit()
+
+        logger.info(f"Database initialized at {self.db_path}")
+
+    async def article_exists(self, article_id: str) -> bool:
+        """Check if article already exists in database"""
+        async with aiosqlite.connect(self.db_path) as db:
+            async with db.execute("SELECT 1 FROM articles WHERE id = ?", (article_id,)) as cursor:
+                result = await cursor.fetchone()
+                return result is not None
+
+    async def save_article(self, article: Article) -> bool:
+        """Save article to database. Returns True if saved, False if duplicate"""
+        if await self.article_exists(article.id):
+            logger.debug(f"Article already exists: {article.title}")
+            return False
+
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                INSERT INTO articles (
+                    id, url, title, summary, content, published, source, 
+                    category, fetched_at, relevance_score, ai_summary, 
+                    processed, included_in_digest
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+                (
+                    article.id,
+                    str(article.url),
+                    article.title,
+                    article.summary,
+                    article.content,
+                    article.published.isoformat(),
+                    article.source,
+                    article.category,
+                    article.fetched_at.isoformat(),
+                    article.relevance_score,
+                    article.ai_summary,
+                    int(article.processed),
+                    int(article.included_in_digest),
+                ),
+            )
+            await db.commit()
+
+        logger.debug(f"Saved article: {article.title}")
+        return True
+
+    async def save_articles(self, articles: list[Article]) -> int:
+        """Save multiple articles. Returns count of new articles saved"""
+        count = 0
+        for article in articles:
+            if await self.save_article(article):
+                count += 1
+        return count
+
+    async def get_unprocessed_articles(self, limit: Optional[int] = None) -> list[Article]:
+        """Get articles that haven't been processed by AI yet"""
+        query = """
+            SELECT * FROM articles 
+            WHERE processed = 0 
+            ORDER BY published DESC
+        """
+        if limit:
+            query += f" LIMIT {limit}"
+
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(query) as cursor:
+                rows = await cursor.fetchall()
+                return [self._row_to_article(row) for row in rows]
+
+    async def update_article_processing(
+        self, article_id: str, relevance_score: float, ai_summary: str, included: bool
+    ):
+        """Update article with AI processing results"""
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                UPDATE articles 
+                SET relevance_score = ?, 
+                    ai_summary = ?, 
+                    processed = 1, 
+                    included_in_digest = ?
+                WHERE id = ?
+            """,
+                (relevance_score, ai_summary, int(included), article_id),
+            )
+            await db.commit()
+
+    async def get_todays_digest_articles(self) -> list[Article]:
+        """Get all articles included in today's digest"""
+        today = datetime.now().date()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(
+                """
+                SELECT * FROM articles 
+                WHERE included_in_digest = 1 
+                AND date(fetched_at) = ?
+                ORDER BY relevance_score DESC, published DESC
+            """,
+                (today.isoformat(),),
+            ) as cursor:
+                rows = await cursor.fetchall()
+                return [self._row_to_article(row) for row in rows]
+
+    async def cleanup_old_articles(self, retention_days: int):
+        """Delete articles older than retention period"""
+        cutoff_date = datetime.now() - timedelta(days=retention_days)
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "DELETE FROM articles WHERE fetched_at < ?", (cutoff_date.isoformat(),)
+            )
+            deleted = cursor.rowcount
+            await db.commit()
+
+        if deleted > 0:
+            logger.info(f"Cleaned up {deleted} old articles")
+
+    def _row_to_article(self, row: aiosqlite.Row) -> Article:
+        """Convert database row to Article model"""
+        return Article(
+            id=row["id"],
+            url=row["url"],
+            title=row["title"],
+            summary=row["summary"],
+            content=row["content"],
+            published=datetime.fromisoformat(row["published"]),
+            source=row["source"],
+            category=row["category"],
+            fetched_at=datetime.fromisoformat(row["fetched_at"]),
+            relevance_score=row["relevance_score"],
+            ai_summary=row["ai_summary"],
+            processed=bool(row["processed"]),
+            included_in_digest=bool(row["included_in_digest"]),
+        )
--- a/src/storage/models.py
+++ b/src/storage/models.py
@@ -0,0 +1,44 @@
+"""Data models for articles and digests"""
+
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel, HttpUrl
+
+
+class Article(BaseModel):
+    """Article data model"""
+
+    id: str  # Hash of URL
+    url: HttpUrl
+    title: str
+    summary: Optional[str] = None
+    content: str
+    published: datetime
+    source: str
+    category: str
+    fetched_at: datetime
+
+    # AI processing fields
+    relevance_score: Optional[float] = None
+    ai_summary: Optional[str] = None
+    processed: bool = False
+    included_in_digest: bool = False
+
+
+class DigestEntry(BaseModel):
+    """Entry in daily digest"""
+
+    article: Article
+    relevance_score: float
+    ai_summary: str
+    category: str
+
+
+class DailyDigest(BaseModel):
+    """Complete daily digest"""
+
+    date: str
+    entries: list[DigestEntry]
+    total_articles_processed: int
+    total_articles_included: int
--- a/src/utils/init.py
+++ b/src/utils/init.py
@@ -0,0 +1 @@
+"""Utility functions"""
				`@@ -0,0 +1 @@`
				`"""News aggregation from various sources"""`