commit e64465a7e686c3ecfc0285c4c2cfc19bdebe3587 Author: Rune Olsen Date: Mon Jan 26 12:34:00 2026 +0100 first commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f6ec9ad --- /dev/null +++ b/.env.example @@ -0,0 +1,13 @@ +# OpenRouter API Configuration +OPENROUTER_API_KEY=your_api_key_here + +# Optional: Your site info for OpenRouter rankings +OPENROUTER_SITE_URL=https://your-site.com +OPENROUTER_SITE_NAME=YourSiteName + +# SMTP Credentials for your mail server +SMTP_USERNAME=your-email@yourdomain.com +SMTP_PASSWORD=your-smtp-password + +# Optional: Email notification for errors +ERROR_NOTIFICATION_EMAIL=your-email@example.com diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4d9d2e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,49 @@ +# Environment variables +.env + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +ENV/ +env/ + +# Data and logs +data/*.db +data/logs/*.log + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ + +# OS +.DS_Store +Thumbs.db diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..7bee0ce --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,217 @@ +# Changes for External Mail Server Support + +This document summarizes the changes made to support external mail servers instead of local Postfix. + +## Files Modified + +### 1. `config.yaml` +- Updated SMTP section to use external mail server settings +- Changed default port from 25 to 587 (standard STARTTLS port) +- Enabled TLS by default +- Added comments explaining port and encryption options + +**Before:** +```yaml +smtp: + host: "localhost" + port: 25 + use_tls: false + use_ssl: false +``` + +**After:** +```yaml +smtp: + host: "mail.yourdomain.com" + port: 587 # Standard SMTP submission port (use 465 for SSL) + use_tls: true # Use STARTTLS (for port 587) + use_ssl: false # Set to true if using port 465 + # Username and password are loaded from .env file for security +``` + +### 2. `.env.example` +- Added SMTP_USERNAME field +- Added SMTP_PASSWORD field +- Added documentation for SMTP credentials + +**Added:** +```env +# SMTP Credentials for your mail server +SMTP_USERNAME=your-email@yourdomain.com +SMTP_PASSWORD=your-smtp-password +``` + +### 3. `src/config.py` +- Added `smtp_username` field to `EnvSettings` class +- Added `smtp_password` field to `EnvSettings` class +- Modified `Config.email` property to merge SMTP credentials from environment variables into email configuration + +**Changes:** +- Environment variables now include SMTP credentials +- SMTP credentials are automatically loaded from `.env` and merged into config +- Keeps passwords secure (not in config.yaml) + +### 4. `README.md` +- Removed Postfix installation instructions +- Updated prerequisites to mention "SMTP Server" instead of Postfix +- Added SMTP configuration examples for various providers +- Updated configuration steps to include SMTP credentials +- Renumbered installation steps (removed Postfix step) + +### 5. `SETUP.md` +- Removed Postfix installation and configuration sections +- Updated system requirements (no Postfix needed) +- Added SMTP credentials to `.env` configuration +- Updated email configuration section with SMTP examples +- Updated verification checklist +- Updated troubleshooting section for SMTP issues + +## New Files Added + +### 6. `SMTP_CONFIG.md` +Comprehensive guide for configuring SMTP with: +- Step-by-step setup instructions +- Common mail server configurations (Gmail, Outlook, SendGrid, etc.) +- Port and encryption guide (587 vs 465 vs 25) +- Testing procedures +- Troubleshooting common SMTP errors +- Security best practices +- Example working configurations + +## How It Works + +### Configuration Flow + +1. **User creates `.env` file** with SMTP credentials: + ```env + SMTP_USERNAME=user@domain.com + SMTP_PASSWORD=password + ``` + +2. **User edits `config.yaml`** with mail server settings: + ```yaml + smtp: + host: "mail.domain.com" + port: 587 + use_tls: true + ``` + +3. **Application loads configuration**: + - `EnvSettings` loads SMTP credentials from `.env` + - `Config` loads mail server settings from `config.yaml` + - `Config.email` property merges them together + +4. **Email sender uses merged configuration**: + - `EmailSender` receives complete SMTP config + - Connects to external mail server + - Authenticates with username/password + - Sends email via authenticated SMTP + +### Security Benefits + +- **Credentials separated from code**: SMTP passwords in `.env` (gitignored) +- **Mail server settings in config**: Non-sensitive info in `config.yaml` +- **No local mail server needed**: Simpler setup, fewer services to maintain +- **Industry standard**: Works with any SMTP provider + +## What Users Need to Do + +### Required Changes + +1. **Update `.env` file**: + ```bash + cp .env.example .env + nano .env + ``` + Add: + ```env + SMTP_USERNAME=your-email@domain.com + SMTP_PASSWORD=your-password + ``` + +2. **Update `config.yaml`**: + ```bash + nano config.yaml + ``` + Change email section: + ```yaml + email: + to: "recipient@example.com" + from: "sender@domain.com" + smtp: + host: "mail.domain.com" + port: 587 + use_tls: true + use_ssl: false + ``` + +### No Longer Needed + +- Postfix installation +- Postfix configuration +- Local mail server maintenance +- Mail relay setup + +## Testing + +To test the new configuration: + +```bash +# 1. Set up credentials +cp .env.example .env +nano .env # Add SMTP credentials + +# 2. Configure mail server +nano config.yaml # Update SMTP settings + +# 3. Test run +source .venv/bin/activate +python -m src.main + +# 4. Check logs +tail -f data/logs/news-agent.log +``` + +## Backward Compatibility + +The system still supports local mail servers if needed: + +**For local Postfix:** +```yaml +smtp: + host: "localhost" + port: 25 + use_tls: false + use_ssl: false +``` + +No credentials needed in `.env` for localhost delivery. + +## Common Use Cases + +### 1. Personal Mail Server +User runs their own mail server at `mail.example.com`: +- Authenticate with their email credentials +- Use standard port 587 with TLS +- Most common use case + +### 2. Gmail for Testing +User wants to test with Gmail: +- Create App Password in Google account +- Use `smtp.gmail.com:587` +- Quick setup for development/testing + +### 3. Commercial SMTP Service +User has SendGrid/Mailgun subscription: +- Use service's SMTP credentials +- Higher reliability and deliverability +- Good for production use + +## Documentation Structure + +Users should read in this order: + +1. **README.md** - Overview and quick start +2. **SETUP.md** - Step-by-step installation +3. **SMTP_CONFIG.md** - Detailed SMTP configuration (if issues) +4. **CHANGES.md** - This file (if migrating from old version) diff --git a/README.md b/README.md new file mode 100644 index 0000000..508ab2c --- /dev/null +++ b/README.md @@ -0,0 +1,360 @@ +# News Agent + +An AI-powered daily tech news aggregator that fetches articles from RSS feeds, filters them by relevance to your interests, generates AI summaries, and emails you a beautifully formatted digest every morning. + +## Features + +- **RSS Aggregation**: Fetches from 15+ tech news sources covering Development, Self-hosting, Enterprise Architecture, and Gadgets +- **AI Filtering**: Uses OpenRouter AI to score articles based on your interests (0-10 scale) +- **Smart Summarization**: Generates concise 2-3 sentence summaries of each relevant article +- **Beautiful Emails**: HTML email with responsive design, categorized sections, and relevance scores +- **Deduplication**: SQLite database prevents duplicate articles +- **Automated Scheduling**: Runs daily at 07:00 Europe/Oslo time via systemd timer +- **Production Ready**: Error handling, logging, resource limits, and monitoring + +## Architecture + +``` +news-agent/ +├── src/ +│ ├── aggregator/ # RSS feed fetching +│ ├── ai/ # OpenRouter client, filtering, summarization +│ ├── storage/ # SQLite database operations +│ ├── email/ # Email generation and sending +│ └── main.py # Main orchestrator +├── config.yaml # Configuration +├── .env # Secrets (API keys) +└── systemd/ # Service and timer files +``` + +## Prerequisites + +- **Fedora Linux** (or other systemd-based distribution) +- **Python 3.11+** +- **SMTP Server** (your own mail server or service like Gmail, Outlook, etc.) +- **OpenRouter API Key** (get from https://openrouter.ai) + +## Installation + +### 1. Clone/Copy Project + +```bash +# Copy this project to your home directory +mkdir -p ~/news-agent +cd ~/news-agent +``` + +### 2. Install Python and Dependencies + +```bash +# Install Python 3.11+ if not already installed +sudo dnf install python3.11 python3-pip + +# Create virtual environment +python3.11 -m venv .venv +source .venv/bin/activate + +# Install dependencies +pip install -e . +``` + +### 3. Configure News Agent + +```bash +# Copy environment template +cp .env.example .env + +# Edit .env and add your credentials +nano .env +``` + +**Required in `.env`:** +```bash +# OpenRouter API Key +OPENROUTER_API_KEY=sk-or-v1-...your-key-here... + +# SMTP Credentials for your mail server +SMTP_USERNAME=your-email@yourdomain.com +SMTP_PASSWORD=your-smtp-password +``` + +**Edit `config.yaml`:** +```bash +nano config.yaml +``` + +Update the email section: +```yaml +email: + to: "your-email@example.com" # Where to receive the digest + from: "news-agent@yourdomain.com" # Sender address + smtp: + host: "mail.yourdomain.com" # Your mail server hostname + port: 587 # 587 for TLS, 465 for SSL + use_tls: true # true for port 587 + use_ssl: false # true for port 465 +``` + +**Common SMTP Settings:** +- **Your own server**: Use your mail server hostname and credentials +- **Gmail**: `smtp.gmail.com:587`, use App Password +- **Outlook/Office365**: `smtp.office365.com:587` +- **SendGrid**: `smtp.sendgrid.net:587`, use API key as password + +Optionally adjust: +- AI model (default: `google/gemini-flash-1.5` - fast and cheap) +- Filtering threshold (default: 6.5/10) +- Max articles per digest (default: 15) +- RSS sources (add/remove feeds) +- Your interests for AI filtering + +### 4. Test Run + +```bash +# Activate virtual environment +source .venv/bin/activate + +# Run manually to test +python -m src.main +``` + +Check: +- Console output for progress +- Logs in `data/logs/news-agent.log` +- Your email inbox for the digest + +### 5. Set Up Systemd Timer + +```bash +# Copy systemd files to user systemd directory +mkdir -p ~/.config/systemd/user +cp systemd/news-agent.service ~/.config/systemd/user/ +cp systemd/news-agent.timer ~/.config/systemd/user/ + +# Edit service file to update paths if needed +nano ~/.config/systemd/user/news-agent.service + +# Reload systemd +systemctl --user daemon-reload + +# Enable and start timer +systemctl --user enable news-agent.timer +systemctl --user start news-agent.timer + +# Check timer status +systemctl --user list-timers +systemctl --user status news-agent.timer +``` + +**Enable lingering** (allows user services to run when not logged in): +```bash +sudo loginctl enable-linger $USER +``` + +## Usage + +### Manual Run + +```bash +cd ~/news-agent +source .venv/bin/activate +python -m src.main +``` + +### Check Status + +```bash +# Check timer status +systemctl --user status news-agent.timer + +# View logs +journalctl --user -u news-agent.service -f + +# Or check log file +tail -f data/logs/news-agent.log +``` + +### Trigger Manually + +```bash +# Run service immediately (without waiting for timer) +systemctl --user start news-agent.service +``` + +### View Last Run + +```bash +systemctl --user status news-agent.service +``` + +## Configuration + +### RSS Sources + +Add or remove sources in `config.yaml`: + +```yaml +sources: + rss: + - name: "Your Source" + url: "https://example.com/feed.xml" + category: "tech" # tech, development, selfhosting, architecture, gadgets +``` + +### AI Configuration + +**Models** (from cheap to expensive): +- `google/gemini-flash-1.5` - Fast, cheap, good quality (recommended) +- `meta-llama/llama-3.1-8b-instruct` - Very cheap +- `anthropic/claude-3.5-haiku` - Better quality, slightly more expensive +- `openai/gpt-4o-mini` - Good quality, moderate price + +**Filtering:** +```yaml +ai: + filtering: + enabled: true + min_score: 6.5 # Articles below this score are filtered out + max_articles: 15 # Maximum articles in daily digest +``` + +**Interests:** +```yaml +ai: + interests: + - "Your interest here" + - "Another topic" +``` + +### Schedule + +Change time in `~/.config/systemd/user/news-agent.timer`: + +```ini +[Timer] +OnCalendar=07:00 # 24-hour format +``` + +Then reload: +```bash +systemctl --user daemon-reload +systemctl --user restart news-agent.timer +``` + +## Troubleshooting + +### No Email Received + +1. **Check logs:** + ```bash + journalctl --user -u news-agent.service -n 50 + ``` + +2. **Check Postfix:** + ```bash + sudo systemctl status postfix + sudo tail -f /var/log/maillog + ``` + +3. **Test email manually:** + ```bash + echo "Test email" | mail -s "Test" your-email@example.com + ``` + +### API Errors + +1. **Verify API key in `.env`** +2. **Check OpenRouter credit balance:** https://openrouter.ai/credits +3. **Check rate limits in logs** + +### Service Not Running + +```bash +# Check service status +systemctl --user status news-agent.service + +# Check timer status +systemctl --user status news-agent.timer + +# View detailed logs +journalctl --user -xe -u news-agent.service +``` + +### Database Issues + +```bash +# Reset database (WARNING: deletes all history) +rm data/articles.db +python -m src.main +``` + +## Cost Estimation + +Using `google/gemini-flash-1.5` (recommended): + +- **Daily:** ~$0.05-0.15 (varies by article count) +- **Monthly:** ~$1.50-4.50 +- **Yearly:** ~$18-54 + +Factors affecting cost: +- Number of new articles +- Content length +- Filtering threshold (lower = more articles = higher cost) + +## Maintenance + +### Update Dependencies + +```bash +cd ~/news-agent +source .venv/bin/activate +pip install --upgrade -e . +``` + +### View Statistics + +```bash +# Check database +sqlite3 data/articles.db "SELECT COUNT(*) FROM articles;" +sqlite3 data/articles.db "SELECT category, COUNT(*) FROM articles GROUP BY category;" +``` + +### Logs Rotation + +Logs automatically rotate at 10MB with 5 backups (configured in `config.yaml`). + +## Advanced Features + +### Add API News Sources + +Extend `src/aggregator/api_fetcher.py` to support NewsAPI, Google News API, etc. + +### Customize Email Template + +Edit `src/email/templates/daily_digest.html` for different styling. + +### Web Dashboard + +Add Flask/FastAPI to create a web interface for viewing past digests. + +## Contributing + +This is a personal project template. Feel free to fork and customize to your needs. + +## License + +MIT License - Free to use and modify + +## Support + +For issues with: +- **OpenRouter:** https://openrouter.ai/docs +- **Postfix:** Fedora documentation +- **This code:** Check logs and configuration + +## Credits + +Built with: +- Python 3.11+ +- OpenRouter AI (https://openrouter.ai) +- Feedparser, Jinja2, Pydantic, and other open-source libraries diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 0000000..8553d65 --- /dev/null +++ b/SETUP.md @@ -0,0 +1,273 @@ +# Quick Setup Guide + +Follow these steps to get News Agent running on your Fedora machine. + +## 1. System Requirements + +```bash +# Update system +sudo dnf update -y + +# Install Python 3.11+ +sudo dnf install python3.11 python3-pip -y +``` + +## 2. Install News Agent + +```bash +# Navigate to project directory +cd ~/news-agent + +# Create virtual environment +python3.11 -m venv .venv + +# Activate it +source .venv/bin/activate + +# Install dependencies +pip install feedparser httpx openai pydantic pydantic-settings jinja2 premailer python-dotenv pyyaml aiosqlite +``` + +## 3. Configuration + +### Create `.env` file: +```bash +cp .env.example .env +nano .env +``` + +Add your credentials: +```env +# OpenRouter API Key (required) +OPENROUTER_API_KEY=sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + +# SMTP Credentials (required) +SMTP_USERNAME=your-email@yourdomain.com +SMTP_PASSWORD=your-smtp-password + +# Optional +OPENROUTER_SITE_URL=https://yoursite.com +OPENROUTER_SITE_NAME=YourSiteName +``` + +Get OpenRouter API key from: https://openrouter.ai/keys + +### Update `config.yaml`: +```bash +nano config.yaml +``` + +Update email settings: +```yaml +email: + to: "your-email@example.com" # Where to receive digest + from: "news-agent@yourdomain.com" # Sender address + smtp: + host: "mail.yourdomain.com" # Your mail server + port: 587 # 587 for STARTTLS, 465 for SSL + use_tls: true # true for port 587 + use_ssl: false # true for port 465 +``` + +**Common mail servers:** +- **Your own**: Use your mail server hostname and SMTP credentials +- **Gmail**: `smtp.gmail.com:587` (requires App Password) +- **Outlook**: `smtp.office365.com:587` + +## 4. Test Run + +```bash +# Make sure virtual environment is activated +source .venv/bin/activate + +# Run the agent +python -m src.main +``` + +**Expected output:** +``` +INFO - News Agent starting... +INFO - Database initialized +INFO - Fetching from 15 RSS sources... +INFO - Fetched 127 articles from all sources +INFO - Saved 127 new articles +INFO - Processing 127 new articles with AI... +INFO - Filtering articles by relevance... +INFO - Selected 15 relevant articles +INFO - Generating AI summaries... +INFO - Generating email digest... +INFO - Sending email... +INFO - Daily digest sent successfully with 15 articles! +``` + +**Check logs:** +```bash +cat data/logs/news-agent.log +``` + +## 5. Set Up Systemd Timer + +```bash +# Create user systemd directory +mkdir -p ~/.config/systemd/user + +# Update service file with correct paths +sed "s|/home/%u/news-agent|$HOME/news-agent|g" systemd/news-agent.service > ~/.config/systemd/user/news-agent.service + +# Copy timer file +cp systemd/news-agent.timer ~/.config/systemd/user/ + +# Reload systemd +systemctl --user daemon-reload + +# Enable timer +systemctl --user enable news-agent.timer +systemctl --user start news-agent.timer + +# Enable lingering (allows service to run when not logged in) +sudo loginctl enable-linger $USER +``` + +**Verify setup:** +```bash +# Check timer is active +systemctl --user list-timers + +# Should show: +# NEXT LEFT LAST PASSED UNIT ACTIVATES +# Tue 2024-01-27 07:00:00 CET 12h left - - news-agent.timer news-agent.service +``` + +## 6. Monitoring + +### View logs in real-time: +```bash +journalctl --user -u news-agent.service -f +``` + +### Check last run: +```bash +systemctl --user status news-agent.service +``` + +### Manual trigger: +```bash +systemctl --user start news-agent.service +``` + +### View timer schedule: +```bash +systemctl --user list-timers news-agent.timer +``` + +## 7. Verification Checklist + +- [ ] Python 3.11+ installed +- [ ] Virtual environment created and activated +- [ ] All dependencies installed +- [ ] `.env` file created with OpenRouter API key and SMTP credentials +- [ ] `config.yaml` updated with your mail server settings +- [ ] Test run completed successfully +- [ ] Email received in inbox +- [ ] Systemd timer enabled and scheduled +- [ ] Lingering enabled for user + +## Common Issues + +### "No module named 'feedparser'" +```bash +source .venv/bin/activate +pip install feedparser httpx openai pydantic pydantic-settings jinja2 premailer python-dotenv pyyaml aiosqlite +``` + +### "Failed to send email" +Check your SMTP credentials and server settings: +```bash +# Verify .env has SMTP credentials +cat .env | grep SMTP + +# Verify config.yaml has correct mail server +grep -A 5 "smtp:" config.yaml + +# Check logs for specific error +tail -n 50 data/logs/news-agent.log +``` + +Common issues: +- Wrong port (587 for TLS, 465 for SSL) +- Missing TLS/SSL setting +- Invalid credentials +- Firewall blocking SMTP port + +### "API key not found" +```bash +# Verify .env file exists and has correct format +cat .env + +# Should show: +# OPENROUTER_API_KEY=sk-or-v1-... +``` + +### Timer not running +```bash +# Check timer status +systemctl --user status news-agent.timer + +# If inactive: +systemctl --user enable news-agent.timer +systemctl --user start news-agent.timer + +# Enable lingering +sudo loginctl enable-linger $USER +``` + +## Customization + +### Change schedule time +Edit `~/.config/systemd/user/news-agent.timer`: +```ini +[Timer] +OnCalendar=08:30 # Run at 8:30 AM instead +``` + +Then reload: +```bash +systemctl --user daemon-reload +systemctl --user restart news-agent.timer +``` + +### Add/remove RSS feeds +Edit `config.yaml` under `sources.rss` section. + +### Adjust AI filtering +In `config.yaml`: +```yaml +ai: + filtering: + min_score: 7.0 # Stricter filtering (default: 6.5) + max_articles: 10 # Fewer articles (default: 15) +``` + +### Change AI model +In `config.yaml`: +```yaml +ai: + model: "anthropic/claude-3.5-haiku" # Better quality + # or + model: "meta-llama/llama-3.1-8b-instruct" # Cheaper +``` + +## Next Steps + +1. **Wait for first automated run** (tomorrow at 07:00) +2. **Monitor costs** at https://openrouter.ai/activity +3. **Adjust filtering threshold** based on article quality +4. **Add custom RSS feeds** for your specific interests +5. **Set up email forwarding** if not using local mailbox + +## Support + +- **Logs:** `data/logs/news-agent.log` +- **Database:** `data/articles.db` (SQLite) +- **Service status:** `systemctl --user status news-agent.service` +- **OpenRouter Dashboard:** https://openrouter.ai/activity diff --git a/SMTP_CONFIG.md b/SMTP_CONFIG.md new file mode 100644 index 0000000..b3aa195 --- /dev/null +++ b/SMTP_CONFIG.md @@ -0,0 +1,321 @@ +# SMTP Configuration Guide + +This guide helps you configure News Agent to work with your mail server. + +## Configuration Overview + +News Agent needs two pieces of configuration: + +1. **SMTP credentials** in `.env` file (secure) +2. **SMTP server settings** in `config.yaml` (non-sensitive) + +## Step-by-Step Setup + +### 1. Edit `.env` file + +```bash +nano .env +``` + +Add your SMTP credentials: +```env +SMTP_USERNAME=your-email@yourdomain.com +SMTP_PASSWORD=your-password-or-app-password +``` + +**Security Note:** The `.env` file is gitignored and should never be committed to version control. + +### 2. Edit `config.yaml` + +```bash +nano config.yaml +``` + +Update the SMTP section under `email`: +```yaml +email: + to: "recipient@example.com" # Where to send the digest + from: "sender@yourdomain.com" # From address (usually same as SMTP_USERNAME) + from_name: "Daily Tech News Agent" + subject_template: "Tech News Digest - {date}" + smtp: + host: "mail.yourdomain.com" # Your SMTP server hostname + port: 587 # See port guide below + use_tls: true # See TLS/SSL guide below + use_ssl: false +``` + +## Common Mail Server Configurations + +### Your Own Mail Server + +If you run your own mail server (Postfix, Exim, etc.): + +```yaml +smtp: + host: "mail.yourdomain.com" + port: 587 # Standard submission port + use_tls: true + use_ssl: false +``` + +```env +SMTP_USERNAME=your-email@yourdomain.com +SMTP_PASSWORD=your-actual-password +``` + +### Gmail + +**Important:** Gmail requires an App Password, not your regular password. + +Generate App Password: +1. Go to https://myaccount.google.com/security +2. Enable 2-factor authentication +3. Go to App Passwords +4. Generate password for "Mail" + +```yaml +smtp: + host: "smtp.gmail.com" + port: 587 + use_tls: true + use_ssl: false +``` + +```env +SMTP_USERNAME=your-email@gmail.com +SMTP_PASSWORD=your-16-char-app-password +``` + +### Outlook / Office 365 + +```yaml +smtp: + host: "smtp.office365.com" + port: 587 + use_tls: true + use_ssl: false +``` + +```env +SMTP_USERNAME=your-email@outlook.com +SMTP_PASSWORD=your-outlook-password +``` + +### SendGrid + +```yaml +smtp: + host: "smtp.sendgrid.net" + port: 587 + use_tls: true + use_ssl: false +``` + +```env +SMTP_USERNAME=apikey +SMTP_PASSWORD=your-sendgrid-api-key +``` + +### Mailgun + +```yaml +smtp: + host: "smtp.mailgun.org" + port: 587 + use_tls: true + use_ssl: false +``` + +```env +SMTP_USERNAME=postmaster@your-domain.mailgun.org +SMTP_PASSWORD=your-mailgun-smtp-password +``` + +## Port and Encryption Guide + +### Port 587 (Recommended) +- **Protocol:** STARTTLS +- **Settings:** `port: 587`, `use_tls: true`, `use_ssl: false` +- **Use case:** Most modern SMTP servers +- **Security:** Connection starts unencrypted, then upgrades to TLS + +### Port 465 +- **Protocol:** SMTPS (SMTP over SSL) +- **Settings:** `port: 465`, `use_tls: false`, `use_ssl: true` +- **Use case:** Legacy SSL connections +- **Security:** Encrypted from the start + +### Port 25 +- **Protocol:** Plain SMTP +- **Settings:** `port: 25`, `use_tls: false`, `use_ssl: false` +- **Use case:** Local mail servers only (not recommended for internet) +- **Security:** Unencrypted (only use on localhost) + +## Testing Your Configuration + +### Test 1: Manual Run + +```bash +cd ~/news-agent +source .venv/bin/activate +python -m src.main +``` + +Check the output for email sending status. + +### Test 2: Check Logs + +```bash +tail -n 50 data/logs/news-agent.log +``` + +Look for: +- `INFO - Email sent successfully` (success) +- `ERROR - SMTP error sending email` (failure with details) + +### Test 3: Verify Credentials + +```bash +# Check .env file has credentials +cat .env | grep SMTP + +# Should show: +# SMTP_USERNAME=your-email@domain.com +# SMTP_PASSWORD=your-password +``` + +## Troubleshooting + +### Error: "Authentication failed" + +**Cause:** Wrong username or password + +**Solutions:** +1. Verify SMTP_USERNAME matches your email exactly +2. For Gmail: Use App Password, not regular password +3. Check for typos in password +4. Ensure no extra spaces in .env file + +### Error: "Connection refused" + +**Cause:** Wrong host or port, or firewall blocking + +**Solutions:** +1. Verify mail server hostname is correct +2. Check if port 587 or 465 is open: + ```bash + telnet mail.yourdomain.com 587 + ``` +3. Check firewall rules: + ```bash + sudo firewall-cmd --list-all + ``` +4. Try alternative port (465 instead of 587) + +### Error: "Certificate verification failed" + +**Cause:** SSL/TLS certificate issues + +**Solutions:** +1. Ensure your mail server has valid SSL certificate +2. If using self-signed certificate, you may need to adjust Python SSL settings (not recommended for security) + +### Error: "Sender address rejected" + +**Cause:** The "from" address doesn't match authenticated user + +**Solutions:** +1. Ensure `email.from` in config.yaml matches `SMTP_USERNAME` +2. Some servers require exact match between sender and authenticated user + +### Error: "Timeout" + +**Cause:** Network issues or slow mail server + +**Solutions:** +1. Check internet connectivity +2. Try a different network +3. Verify mail server is responsive + +## Security Best Practices + +1. **Never commit `.env` file** - It's gitignored by default +2. **Use App Passwords** - For Gmail and similar services +3. **Use TLS/SSL** - Always encrypt the connection (port 587 or 465) +4. **Restrict file permissions**: + ```bash + chmod 600 .env + ``` +5. **Rotate passwords regularly** - Change SMTP password periodically + +## Advanced: Testing SMTP Manually + +You can test SMTP connection with OpenSSL: + +```bash +# Test STARTTLS (port 587) +openssl s_client -starttls smtp -connect mail.yourdomain.com:587 + +# Test SSL (port 465) +openssl s_client -connect mail.yourdomain.com:465 + +# If connection succeeds, you'll see certificate info and can test SMTP commands: +EHLO localhost +AUTH LOGIN +# (enter base64 encoded username and password) +``` + +## Getting Help + +If you're still having issues: + +1. **Check mail server logs** (if you control the server) +2. **Contact your mail provider** - They can verify SMTP settings +3. **Review News Agent logs** - Often contains specific error messages: + ```bash + cat data/logs/news-agent.log + ``` +4. **Test with another SMTP tool** - Verify credentials work outside News Agent + +## Example Working Configurations + +### Personal Mail Server (Most Common) + +**.env:** +```env +SMTP_USERNAME=myemail@mydomain.com +SMTP_PASSWORD=MySecurePassword123 +``` + +**config.yaml:** +```yaml +email: + to: "myemail@mydomain.com" + from: "news-agent@mydomain.com" + smtp: + host: "mail.mydomain.com" + port: 587 + use_tls: true + use_ssl: false +``` + +### Gmail with App Password + +**.env:** +```env +SMTP_USERNAME=myemail@gmail.com +SMTP_PASSWORD=abcdabcdabcdabcd +``` + +**config.yaml:** +```yaml +email: + to: "myemail@gmail.com" + from: "myemail@gmail.com" + smtp: + host: "smtp.gmail.com" + port: 587 + use_tls: true + use_ssl: false +``` diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..0fcc02a --- /dev/null +++ b/config.yaml @@ -0,0 +1,117 @@ +schedule: + time: "07:00" + timezone: "Europe/Oslo" + +sources: + rss: + # General Tech + - name: "Hacker News" + url: "https://news.ycombinator.com/rss" + category: "tech" + + - name: "Ars Technica" + url: "https://feeds.arstechnica.com/arstechnica/index" + category: "tech" + + - name: "TechCrunch" + url: "https://techcrunch.com/feed/" + category: "tech" + + # Development + - name: "Dev.to" + url: "https://dev.to/feed" + category: "development" + + - name: "GitHub Trending" + url: "https://mshibanami.github.io/GitHubTrendingRSS/daily/all.xml" + category: "development" + + - name: "InfoQ" + url: "https://feed.infoq.com/" + category: "development" + + - name: "r/programming" + url: "https://www.reddit.com/r/programming/.rss" + category: "development" + + # Self-hosting + - name: "r/selfhosted" + url: "https://www.reddit.com/r/selfhosted/.rss" + category: "selfhosting" + + - name: "r/homelab" + url: "https://www.reddit.com/r/homelab/.rss" + category: "selfhosting" + + # Enterprise Architecture + - name: "Martin Fowler" + url: "https://martinfowler.com/feed.atom" + category: "architecture" + + - name: "InfoQ Architecture" + url: "https://feed.infoq.com/architecture-design/" + category: "architecture" + + - name: "The New Stack" + url: "https://thenewstack.io/feed/" + category: "architecture" + + # Gadgets + - name: "Engadget" + url: "https://www.engadget.com/rss.xml" + category: "gadgets" + + - name: "AnandTech" + url: "https://www.anandtech.com/rss/" + category: "gadgets" + + - name: "Tom's Hardware" + url: "https://www.tomshardware.com/feeds/all" + category: "gadgets" + +ai: + provider: "openrouter" + base_url: "https://openrouter.ai/api/v1" + model: "google/gemini-flash-1.5" + # Alternative models: + # - "anthropic/claude-3.5-haiku" (better quality, slightly more expensive) + # - "meta-llama/llama-3.1-8b-instruct" (very cheap) + + filtering: + enabled: true + min_score: 6.5 # Out of 10 - articles below this score are filtered out + max_articles: 15 # Maximum articles to include in daily digest + + interests: + - "AI and machine learning developments" + - "Open source projects and tools" + - "Self-hosting solutions and home lab setups" + - "Enterprise architecture patterns and best practices" + - "Python ecosystem and development tools" + - "Linux and system administration" + - "New gadgets and hardware reviews" + - "Cloud-native technologies and Kubernetes" + - "DevOps and infrastructure as code" + - "Security and privacy tools" + +email: + to: "your-email@example.com" + from: "news-agent@yourdomain.com" + from_name: "Daily Tech News Agent" + subject_template: "Tech News Digest - {date}" + smtp: + host: "mail.yourdomain.com" + port: 587 # Standard SMTP submission port (use 465 for SSL) + use_tls: true # Use STARTTLS (for port 587) + use_ssl: false # Set to true if using port 465 + # Username and password are loaded from .env file for security + +database: + path: "data/articles.db" + retention_days: 30 # How long to keep article history + +logging: + level: "INFO" + file: "data/logs/news-agent.log" + max_bytes: 10485760 # 10MB + backup_count: 5 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f89227e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,35 @@ +[project] +name = "news-agent" +version = "0.1.0" +description = "AI-powered daily tech news aggregator and email digest" +requires-python = ">=3.11" +dependencies = [ + "feedparser>=6.0.11", + "httpx>=0.27.0", + "openai>=1.12.0", + "pydantic>=2.6.0", + "pydantic-settings>=2.1.0", + "jinja2>=3.1.3", + "premailer>=3.10.0", + "python-dotenv>=1.0.1", + "pyyaml>=6.0.1", + "aiosqlite>=0.19.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-asyncio>=0.23.5", + "ruff>=0.2.1", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.ruff] +line-length = 100 +target-version = "py311" + +[tool.ruff.lint] +select = ["E", "F", "I", "N", "W", "UP"] diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..3e964cc --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,3 @@ +"""News Agent - AI-powered daily tech news aggregator""" + +__version__ = "0.1.0" diff --git a/src/aggregator/__init__.py b/src/aggregator/__init__.py new file mode 100644 index 0000000..71f66cd --- /dev/null +++ b/src/aggregator/__init__.py @@ -0,0 +1 @@ +"""News aggregation from various sources""" diff --git a/src/aggregator/rss_fetcher.py b/src/aggregator/rss_fetcher.py new file mode 100644 index 0000000..c3fb529 --- /dev/null +++ b/src/aggregator/rss_fetcher.py @@ -0,0 +1,162 @@ +"""RSS feed fetching and parsing""" + +import feedparser +import httpx +from datetime import datetime, timedelta, timezone +from hashlib import sha256 +from typing import Optional +from email.utils import parsedate_to_datetime + +from ..config import RSSSource +from ..storage.models import Article +from ..logger import get_logger + +logger = get_logger() + + +class RSSFetcher: + """Fetch and parse RSS feeds""" + + def __init__(self, timeout: int = 30, hours_lookback: int = 24): + """ + Initialize RSS fetcher + + Args: + timeout: HTTP request timeout in seconds + hours_lookback: How many hours back to fetch articles + """ + self.timeout = timeout + self.hours_lookback = hours_lookback + self.client = httpx.AsyncClient( + timeout=timeout, + follow_redirects=True, + headers={"User-Agent": "NewsAgent/1.0 RSS Reader"}, + ) + + async def close(self): + """Close HTTP client""" + await self.client.aclose() + + async def fetch(self, source: RSSSource) -> list[Article]: + """ + Fetch and parse articles from RSS feed + + Args: + source: RSS source configuration + + Returns: + List of Article objects from the feed + """ + try: + logger.info(f"Fetching RSS feed: {source.name}") + response = await self.client.get(str(source.url)) + response.raise_for_status() + + # Parse feed + feed = feedparser.parse(response.text) + + if feed.bozo: + logger.warning(f"Feed parsing warning for {source.name}: {feed.bozo_exception}") + + articles = [] + cutoff_time = datetime.now(timezone.utc) - timedelta(hours=self.hours_lookback) + + for entry in feed.entries: + try: + article = self._parse_entry(entry, source) + if article and article.published >= cutoff_time: + articles.append(article) + except Exception as e: + logger.warning(f"Failed to parse entry from {source.name}: {e}") + continue + + logger.info(f"Fetched {len(articles)} articles from {source.name}") + return articles + + except httpx.HTTPError as e: + logger.error(f"HTTP error fetching {source.name}: {e}") + return [] + except Exception as e: + logger.error(f"Unexpected error fetching {source.name}: {e}") + return [] + + def _parse_entry( + self, entry: feedparser.FeedParserDict, source: RSSSource + ) -> Optional[Article]: + """Parse a single RSS entry into an Article""" + # Get URL + url = entry.get("link") + if not url: + return None + + # Generate unique ID from URL + article_id = sha256(url.encode()).hexdigest() + + # Get title + title = entry.get("title", "Untitled") + + # Get published date + published = self._parse_date(entry) + if not published: + published = datetime.now(timezone.utc) + + # Get summary/content + summary = entry.get("summary", None) + content = entry.get("content", [{}])[0].get("value", entry.get("description", title)) + + # Remove HTML tags from content if needed (basic cleanup) + if content == title: + content = summary or title + + return Article( + id=article_id, + url=url, + title=title, + summary=summary, + content=content, + published=published, + source=source.name, + category=source.category, + fetched_at=datetime.now(timezone.utc), + ) + + def _parse_date(self, entry: feedparser.FeedParserDict) -> Optional[datetime]: + """Parse published date from entry""" + # Try different date fields + for date_field in ["published_parsed", "updated_parsed", "created_parsed"]: + if date_field in entry and entry[date_field]: + try: + time_tuple = entry[date_field] + dt = datetime(*time_tuple[:6], tzinfo=timezone.utc) + return dt + except (TypeError, ValueError): + continue + + # Try parsing date strings + for date_field in ["published", "updated", "created"]: + if date_field in entry and entry[date_field]: + try: + return parsedate_to_datetime(entry[date_field]) + except (TypeError, ValueError): + continue + + return None + + async def fetch_all(self, sources: list[RSSSource]) -> list[Article]: + """ + Fetch articles from multiple RSS sources concurrently + + Args: + sources: List of RSS sources to fetch + + Returns: + Combined list of articles from all sources + """ + all_articles = [] + + for source in sources: + articles = await self.fetch(source) + all_articles.extend(articles) + + logger.info(f"Total articles fetched from all sources: {len(all_articles)}") + return all_articles diff --git a/src/ai/__init__.py b/src/ai/__init__.py new file mode 100644 index 0000000..c7ff796 --- /dev/null +++ b/src/ai/__init__.py @@ -0,0 +1 @@ +"""AI processing using OpenRouter""" diff --git a/src/ai/client.py b/src/ai/client.py new file mode 100644 index 0000000..78bb2f1 --- /dev/null +++ b/src/ai/client.py @@ -0,0 +1,117 @@ +"""OpenRouter API client""" + +import json +from typing import Any + +from openai import AsyncOpenAI + +from ..config import get_config +from ..logger import get_logger + +logger = get_logger() + + +class OpenRouterClient: + """Async client for OpenRouter API""" + + def __init__(self): + config = get_config() + env = config.env + + self.client = AsyncOpenAI( + base_url=config.ai.base_url, + api_key=env.openrouter_api_key, + default_headers={ + **({"HTTP-Referer": env.openrouter_site_url} if env.openrouter_site_url else {}), + **({"X-Title": env.openrouter_site_name} if env.openrouter_site_name else {}), + }, + ) + + self.model = config.ai.model + logger.info(f"Initialized OpenRouter client with model: {self.model}") + + async def chat_completion( + self, + system_prompt: str, + user_prompt: str, + temperature: float = 0.7, + max_tokens: int = 1000, + json_mode: bool = False, + ) -> str: + """ + Send chat completion request + + Args: + system_prompt: System instruction + user_prompt: User message + temperature: Sampling temperature (0-2) + max_tokens: Maximum tokens to generate + json_mode: Enable JSON response format + + Returns: + Response content as string + """ + try: + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ] + + kwargs: dict[str, Any] = { + "model": self.model, + "messages": messages, + "temperature": temperature, + "max_tokens": max_tokens, + } + + if json_mode: + kwargs["response_format"] = {"type": "json_object"} + + response = await self.client.chat.completions.create(**kwargs) + + content = response.choices[0].message.content + if not content: + raise ValueError("Empty response from API") + + # Log token usage + if response.usage: + logger.debug( + f"Tokens used - Prompt: {response.usage.prompt_tokens}, " + f"Completion: {response.usage.completion_tokens}, " + f"Total: {response.usage.total_tokens}" + ) + + return content + + except Exception as e: + logger.error(f"OpenRouter API error: {e}") + raise + + async def chat_completion_json( + self, system_prompt: str, user_prompt: str, temperature: float = 0.3, max_tokens: int = 500 + ) -> dict[str, Any]: + """ + Send chat completion request expecting JSON response + + Args: + system_prompt: System instruction + user_prompt: User message + temperature: Sampling temperature + max_tokens: Maximum tokens to generate + + Returns: + Parsed JSON response as dictionary + """ + content = await self.chat_completion( + system_prompt=system_prompt, + user_prompt=user_prompt, + temperature=temperature, + max_tokens=max_tokens, + json_mode=True, + ) + + try: + return json.loads(content) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON response: {content}") + raise ValueError(f"Invalid JSON response: {e}") diff --git a/src/ai/filter.py b/src/ai/filter.py new file mode 100644 index 0000000..5e7e3ec --- /dev/null +++ b/src/ai/filter.py @@ -0,0 +1,118 @@ +"""Article relevance filtering using AI""" + +from typing import Optional + +from ..storage.models import Article +from ..config import get_config +from ..logger import get_logger +from .client import OpenRouterClient +from .prompts import FILTERING_SYSTEM_PROMPT, FILTERING_USER_PROMPT + +logger = get_logger() + + +class ArticleFilter: + """Filter articles by relevance using AI""" + + def __init__(self, client: OpenRouterClient): + self.client = client + config = get_config() + self.interests = config.ai.interests + self.min_score = config.ai.filtering.min_score + + async def score_article(self, article: Article) -> Optional[float]: + """ + Score article relevance (0-10) + + Args: + article: Article to score + + Returns: + Relevance score or None if scoring fails + """ + try: + # Prepare prompts + system_prompt = FILTERING_SYSTEM_PROMPT.format( + interests="\n".join(f"- {interest}" for interest in self.interests) + ) + + # Truncate content for API call + content_preview = article.content[:500] + ("..." if len(article.content) > 500 else "") + + user_prompt = FILTERING_USER_PROMPT.format( + title=article.title, + source=article.source, + category=article.category, + content=content_preview, + ) + + # Get score from AI + response = await self.client.chat_completion_json( + system_prompt=system_prompt, + user_prompt=user_prompt, + temperature=0.3, + max_tokens=200, + ) + + score = float(response.get("score", 0)) + reason = response.get("reason", "No reason provided") + + logger.debug(f"Article '{article.title}' scored {score:.1f}: {reason}") + + return score + + except Exception as e: + logger.error(f"Failed to score article '{article.title}': {e}") + return None + + async def is_relevant(self, article: Article) -> tuple[bool, Optional[float]]: + """ + Check if article meets relevance threshold + + Args: + article: Article to check + + Returns: + Tuple of (is_relevant, score) + """ + score = await self.score_article(article) + + if score is None: + return False, None + + is_relevant = score >= self.min_score + return is_relevant, score + + async def filter_articles( + self, articles: list[Article], max_articles: Optional[int] = None + ) -> list[tuple[Article, float]]: + """ + Filter and rank articles by relevance + + Args: + articles: Articles to filter + max_articles: Maximum number of articles to return + + Returns: + List of (article, score) tuples, sorted by score descending + """ + scored_articles: list[tuple[Article, float]] = [] + + for article in articles: + is_relevant, score = await self.is_relevant(article) + + if is_relevant and score is not None: + scored_articles.append((article, score)) + + # Sort by score descending + scored_articles.sort(key=lambda x: x[1], reverse=True) + + # Apply limit if specified + if max_articles: + scored_articles = scored_articles[:max_articles] + + logger.info( + f"Filtered {len(articles)} articles down to {len(scored_articles)} relevant ones" + ) + + return scored_articles diff --git a/src/ai/prompts.py b/src/ai/prompts.py new file mode 100644 index 0000000..f9ecbda --- /dev/null +++ b/src/ai/prompts.py @@ -0,0 +1,60 @@ +"""Prompt templates for AI processing""" + +FILTERING_SYSTEM_PROMPT = """You are a news relevance analyzer. Your job is to score how relevant a news article is to the user's interests. + +User Interests: +{interests} + +Score the article on a scale of 0-10 based on: +- Direct relevance to stated interests (0-4 points) +- Quality and depth of content (0-3 points) +- Timeliness and importance (0-3 points) + +Return ONLY a JSON object with this exact format: +{{"score": , "reason": ""}} + +Be strict - only highly relevant articles should score above 7.0.""" + +FILTERING_USER_PROMPT = """Article Title: {title} + +Source: {source} + +Category: {category} + +Content Preview: {content} + +Score this article's relevance (0-10) and explain why.""" + + +SUMMARIZATION_SYSTEM_PROMPT = """You are a technical news summarizer. Create concise, informative summaries of tech articles. + +Guidelines: +- Focus on key facts, findings, and implications +- Include important technical details +- Keep summaries to 2-3 sentences +- Use clear, professional language +- Highlight what makes this newsworthy + +Return ONLY the summary text, no additional formatting.""" + +SUMMARIZATION_USER_PROMPT = """Title: {title} + +Source: {source} + +Content: {content} + +Write a concise 2-3 sentence summary highlighting the key information.""" + + +BATCH_SUMMARIZATION_SYSTEM_PROMPT = """You are a technical news summarizer. Create concise summaries for multiple articles. + +For each article, provide a 2-3 sentence summary that: +- Captures the key facts and findings +- Highlights technical details +- Explains why it's newsworthy + +Return a JSON array with this exact format: +[ + {{"id": "", "summary": "<2-3 sentence summary>"}}, + ... +]""" diff --git a/src/ai/summarizer.py b/src/ai/summarizer.py new file mode 100644 index 0000000..3913c76 --- /dev/null +++ b/src/ai/summarizer.py @@ -0,0 +1,72 @@ +"""Article summarization using AI""" + +from ..storage.models import Article +from ..logger import get_logger +from .client import OpenRouterClient +from .prompts import SUMMARIZATION_SYSTEM_PROMPT, SUMMARIZATION_USER_PROMPT + +logger = get_logger() + + +class ArticleSummarizer: + """Summarize articles using AI""" + + def __init__(self, client: OpenRouterClient): + self.client = client + + async def summarize(self, article: Article) -> str: + """ + Generate concise summary of article + + Args: + article: Article to summarize + + Returns: + Summary text (2-3 sentences) + """ + try: + # Truncate content if too long + max_content_length = 2000 + content = article.content + if len(content) > max_content_length: + content = content[:max_content_length] + "..." + + user_prompt = SUMMARIZATION_USER_PROMPT.format( + title=article.title, source=article.source, content=content + ) + + summary = await self.client.chat_completion( + system_prompt=SUMMARIZATION_SYSTEM_PROMPT, + user_prompt=user_prompt, + temperature=0.5, + max_tokens=300, + ) + + logger.debug(f"Summarized article: {article.title}") + return summary.strip() + + except Exception as e: + logger.error(f"Failed to summarize article '{article.title}': {e}") + # Fallback to original summary or truncated content + if article.summary: + return article.summary + return article.content[:200] + "..." + + async def summarize_batch(self, articles: list[Article]) -> dict[str, str]: + """ + Summarize multiple articles + + Args: + articles: List of articles to summarize + + Returns: + Dictionary mapping article IDs to summaries + """ + summaries = {} + + for article in articles: + summary = await self.summarize(article) + summaries[article.id] = summary + + logger.info(f"Summarized {len(summaries)} articles") + return summaries diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..8623b21 --- /dev/null +++ b/src/config.py @@ -0,0 +1,156 @@ +"""Configuration management for News Agent""" + +import yaml +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class RSSSource(BaseModel): + """RSS feed source configuration""" + + name: str + url: HttpUrl + category: str + + +class AIFilteringConfig(BaseModel): + """AI filtering configuration""" + + enabled: bool = True + min_score: float = Field(ge=0, le=10, default=6.5) + max_articles: int = Field(ge=1, default=15) + + +class AIConfig(BaseModel): + """AI processing configuration""" + + provider: str = "openrouter" + base_url: str = "https://openrouter.ai/api/v1" + model: str = "google/gemini-flash-1.5" + filtering: AIFilteringConfig + interests: list[str] + + +class SMTPConfig(BaseModel): + """SMTP server configuration""" + + host: str = "localhost" + port: int = 25 + use_tls: bool = False + use_ssl: bool = False + username: str | None = None + password: str | None = None + + +class EmailConfig(BaseModel): + """Email configuration""" + + to: str + from_: str = Field(alias="from") + from_name: str = "Daily Tech News Agent" + subject_template: str = "Tech News Digest - {date}" + smtp: SMTPConfig + + +class ScheduleConfig(BaseModel): + """Schedule configuration""" + + time: str = "07:00" + timezone: str = "Europe/Oslo" + + +class DatabaseConfig(BaseModel): + """Database configuration""" + + path: str = "data/articles.db" + retention_days: int = 30 + + +class LoggingConfig(BaseModel): + """Logging configuration""" + + level: str = "INFO" + file: str = "data/logs/news-agent.log" + max_bytes: int = 10485760 + backup_count: int = 5 + + +class EnvSettings(BaseSettings): + """Environment variable settings""" + + model_config = SettingsConfigDict(env_file=".env", case_sensitive=False, extra="ignore") + + openrouter_api_key: str + openrouter_site_url: str | None = None + openrouter_site_name: str | None = None + smtp_username: str | None = None + smtp_password: str | None = None + error_notification_email: str | None = None + + +class Config: + """Main configuration manager""" + + def __init__(self, config_path: str | Path = "config.yaml"): + """Load configuration from YAML file and environment variables""" + self.config_path = Path(config_path) + + # Load YAML configuration + with open(self.config_path) as f: + self._config: dict[str, Any] = yaml.safe_load(f) + + # Load environment variables + self.env = EnvSettings() + + @property + def rss_sources(self) -> list[RSSSource]: + """Get all RSS sources""" + return [RSSSource(**src) for src in self._config["sources"]["rss"]] + + @property + def ai(self) -> AIConfig: + """Get AI configuration""" + return AIConfig(**self._config["ai"]) + + @property + def email(self) -> EmailConfig: + """Get email configuration""" + email_config = self._config["email"].copy() + + # Merge SMTP credentials from environment variables + if self.env.smtp_username: + email_config["smtp"]["username"] = self.env.smtp_username + if self.env.smtp_password: + email_config["smtp"]["password"] = self.env.smtp_password + + return EmailConfig(**email_config) + + @property + def schedule(self) -> ScheduleConfig: + """Get schedule configuration""" + return ScheduleConfig(**self._config["schedule"]) + + @property + def database(self) -> DatabaseConfig: + """Get database configuration""" + return DatabaseConfig(**self._config["database"]) + + @property + def logging(self) -> LoggingConfig: + """Get logging configuration""" + return LoggingConfig(**self._config["logging"]) + + +# Global config instance (lazy loaded) +_config: Config | None = None + + +def get_config() -> Config: + """Get or create global config instance""" + global _config + if _config is None: + _config = Config() + return _config diff --git a/src/email/__init__.py b/src/email/__init__.py new file mode 100644 index 0000000..49659e8 --- /dev/null +++ b/src/email/__init__.py @@ -0,0 +1 @@ +"""Email generation and sending""" diff --git a/src/email/generator.py b/src/email/generator.py new file mode 100644 index 0000000..cac840c --- /dev/null +++ b/src/email/generator.py @@ -0,0 +1,114 @@ +"""Email HTML generation from digest data""" + +from datetime import datetime +from pathlib import Path +from collections import defaultdict + +from jinja2 import Environment, FileSystemLoader +from premailer import transform + +from ..storage.models import DigestEntry +from ..logger import get_logger + +logger = get_logger() + + +class EmailGenerator: + """Generate HTML emails from digest data""" + + def __init__(self): + # Set up Jinja2 template environment + template_dir = Path(__file__).parent / "templates" + self.env = Environment(loader=FileSystemLoader(template_dir)) + + def generate_digest_email( + self, entries: list[DigestEntry], date_str: str, subject: str + ) -> tuple[str, str]: + """ + Generate HTML email for daily digest + + Args: + entries: List of digest entries (articles with summaries) + date_str: Date string for the digest + subject: Email subject line + + Returns: + Tuple of (html_content, text_content) + """ + # Group articles by category + articles_by_category = defaultdict(list) + for entry in entries: + articles_by_category[entry.category].append(entry) + + # Sort categories + sorted_categories = sorted(articles_by_category.keys()) + + # Get unique sources count + unique_sources = len(set(entry.article.source for entry in entries)) + + # Prepare template data + template_data = { + "title": subject, + "date": date_str, + "total_articles": len(entries), + "total_sources": unique_sources, + "total_categories": len(sorted_categories), + "articles_by_category": {cat: articles_by_category[cat] for cat in sorted_categories}, + } + + # Render HTML template + template = self.env.get_template("daily_digest.html") + html = template.render(**template_data) + + # Inline CSS for email compatibility + html_inlined = transform(html) + + # Generate plain text version + text = self._generate_text_version(entries, date_str, subject) + + logger.info(f"Generated email with {len(entries)} articles") + + return html_inlined, text + + def _generate_text_version( + self, entries: list[DigestEntry], date_str: str, subject: str + ) -> str: + """Generate plain text version of email""" + lines = [ + subject, + "=" * len(subject), + "", + f"Date: {date_str}", + f"Total Articles: {len(entries)}", + "", + "", + ] + + # Group by category + articles_by_category = defaultdict(list) + for entry in entries: + articles_by_category[entry.category].append(entry) + + # Output each category + for category in sorted(articles_by_category.keys()): + lines.append(f"{category.upper()}") + lines.append("-" * len(category)) + lines.append("") + + for entry in articles_by_category[category]: + article = entry.article + lines.append(f"• {article.title}") + lines.append(f" Source: {article.source}") + lines.append(f" Published: {article.published.strftime('%B %d, %Y at %H:%M')}") + lines.append(f" Relevance: {entry.relevance_score:.1f}/10") + lines.append(f" URL: {article.url}") + lines.append(f" Summary: {entry.ai_summary}") + lines.append("") + + lines.append("") + + lines.append("") + lines.append("---") + lines.append("Generated by News Agent | Powered by OpenRouter AI") + + return "\n".join(lines) diff --git a/src/email/sender.py b/src/email/sender.py new file mode 100644 index 0000000..8dbbed7 --- /dev/null +++ b/src/email/sender.py @@ -0,0 +1,77 @@ +"""Email sending via SMTP""" + +import smtplib +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from email.utils import formatdate + +from ..config import get_config +from ..logger import get_logger + +logger = get_logger() + + +class EmailSender: + """Send emails via SMTP""" + + def __init__(self): + config = get_config() + self.config = config.email + + def send(self, subject: str, html_content: str, text_content: str) -> bool: + """ + Send email with HTML and plain text versions + + Args: + subject: Email subject line + html_content: HTML email body + text_content: Plain text email body + + Returns: + True if sent successfully, False otherwise + """ + try: + # Create message + msg = MIMEMultipart("alternative") + msg["Subject"] = subject + msg["From"] = f"{self.config.from_name} <{self.config.from_}>" + msg["To"] = self.config.to + msg["Date"] = formatdate(localtime=True) + + # Attach parts + part_text = MIMEText(text_content, "plain", "utf-8") + part_html = MIMEText(html_content, "html", "utf-8") + + msg.attach(part_text) + msg.attach(part_html) + + # Send via SMTP + smtp_config = self.config.smtp + + if smtp_config.use_ssl: + server = smtplib.SMTP_SSL(smtp_config.host, smtp_config.port) + else: + server = smtplib.SMTP(smtp_config.host, smtp_config.port) + + try: + if smtp_config.use_tls and not smtp_config.use_ssl: + server.starttls() + + # Login if credentials provided + if smtp_config.username and smtp_config.password: + server.login(smtp_config.username, smtp_config.password) + + # Send email + server.send_message(msg) + logger.info(f"Email sent successfully to {self.config.to}") + return True + + finally: + server.quit() + + except smtplib.SMTPException as e: + logger.error(f"SMTP error sending email: {e}") + return False + except Exception as e: + logger.error(f"Unexpected error sending email: {e}") + return False diff --git a/src/email/templates/daily_digest.html b/src/email/templates/daily_digest.html new file mode 100644 index 0000000..5cb21f6 --- /dev/null +++ b/src/email/templates/daily_digest.html @@ -0,0 +1,194 @@ + + + + + + {{ title }} + + + +
+
+

{{ title }}

+
{{ date }}
+
+ +
+

{{ total_articles }} articles curated from your personalized news sources

+

{{ total_sources }} sources | {{ total_categories }} categories

+
+ + {% for category, articles in articles_by_category.items() %} +
+
+

{{ category }}

+
+ + {% for entry in articles %} + + {% endfor %} +
+ {% endfor %} + + +
+ + diff --git a/src/logger.py b/src/logger.py new file mode 100644 index 0000000..e249813 --- /dev/null +++ b/src/logger.py @@ -0,0 +1,54 @@ +"""Logging configuration for News Agent""" + +import logging +import sys +from logging.handlers import RotatingFileHandler +from pathlib import Path + +from .config import get_config + + +def setup_logger(name: str = "news-agent") -> logging.Logger: + """Setup and configure logger with file and console handlers""" + config = get_config() + log_config = config.logging + + # Create logger + logger = logging.getLogger(name) + logger.setLevel(getattr(logging, log_config.level.upper())) + + # Avoid duplicate handlers + if logger.handlers: + return logger + + # Create formatters + detailed_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" + ) + simple_formatter = logging.Formatter("%(levelname)s - %(message)s") + + # File handler with rotation + log_file = Path(log_config.file) + log_file.parent.mkdir(parents=True, exist_ok=True) + + file_handler = RotatingFileHandler( + log_file, maxBytes=log_config.max_bytes, backupCount=log_config.backup_count + ) + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(detailed_formatter) + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.INFO) + console_handler.setFormatter(simple_formatter) + + # Add handlers + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + return logger + + +def get_logger(name: str = "news-agent") -> logging.Logger: + """Get or create logger instance""" + return logging.getLogger(name) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..e10b911 --- /dev/null +++ b/src/main.py @@ -0,0 +1,153 @@ +"""Main orchestrator for News Agent""" + +import asyncio +from datetime import datetime + +from .config import get_config +from .logger import setup_logger, get_logger +from .storage.database import Database +from .storage.models import DigestEntry +from .aggregator.rss_fetcher import RSSFetcher +from .ai.client import OpenRouterClient +from .ai.filter import ArticleFilter +from .ai.summarizer import ArticleSummarizer +from .email.generator import EmailGenerator +from .email.sender import EmailSender + + +async def main(): + """Main execution flow""" + # Setup logging + setup_logger() + logger = get_logger() + + logger.info("=" * 60) + logger.info("News Agent starting...") + logger.info("=" * 60) + + try: + # Load configuration + config = get_config() + + # Initialize database + db = Database(config.database.path) + await db.initialize() + + # Clean up old articles + await db.cleanup_old_articles(config.database.retention_days) + + # Initialize RSS fetcher + fetcher = RSSFetcher() + + # Fetch articles from all sources + logger.info(f"Fetching from {len(config.rss_sources)} RSS sources...") + articles = await fetcher.fetch_all(config.rss_sources) + + if not articles: + logger.warning("No articles fetched from any source") + await fetcher.close() + return + + # Save articles to database (deduplication) + new_articles_count = await db.save_articles(articles) + logger.info( + f"Saved {new_articles_count} new articles (filtered {len(articles) - new_articles_count} duplicates)" + ) + + await fetcher.close() + + # Get unprocessed articles + unprocessed = await db.get_unprocessed_articles() + + if not unprocessed: + logger.info("No new articles to process") + return + + logger.info(f"Processing {len(unprocessed)} new articles with AI...") + + # Initialize AI components + ai_client = OpenRouterClient() + filter_ai = ArticleFilter(ai_client) + summarizer = ArticleSummarizer(ai_client) + + # Filter articles by relevance + logger.info("Filtering articles by relevance...") + filtered_articles = await filter_ai.filter_articles( + unprocessed, max_articles=config.ai.filtering.max_articles + ) + + if not filtered_articles: + logger.warning("No relevant articles found after filtering") + # Mark all as processed but not included + for article in unprocessed: + await db.update_article_processing( + article.id, relevance_score=0.0, ai_summary="", included=False + ) + return + + logger.info(f"Selected {len(filtered_articles)} relevant articles") + + # Summarize filtered articles + logger.info("Generating AI summaries...") + digest_entries = [] + + for article, score in filtered_articles: + summary = await summarizer.summarize(article) + + # Update database + await db.update_article_processing( + article.id, relevance_score=score, ai_summary=summary, included=True + ) + + # Create digest entry + entry = DigestEntry( + article=article, + relevance_score=score, + ai_summary=summary, + category=article.category, + ) + digest_entries.append(entry) + + # Mark remaining unprocessed articles as processed but not included + processed_ids = {article.id for article, _ in filtered_articles} + for article in unprocessed: + if article.id not in processed_ids: + await db.update_article_processing( + article.id, relevance_score=0.0, ai_summary="", included=False + ) + + # Generate email + logger.info("Generating email digest...") + generator = EmailGenerator() + + date_str = datetime.now().strftime("%A, %B %d, %Y") + subject = config.email.subject_template.format(date=date_str) + + html_content, text_content = generator.generate_digest_email( + digest_entries, date_str, subject + ) + + # Send email + logger.info("Sending email...") + sender = EmailSender() + success = sender.send(subject, html_content, text_content) + + if success: + logger.info("=" * 60) + logger.info(f"Daily digest sent successfully with {len(digest_entries)} articles!") + logger.info("=" * 60) + else: + logger.error("Failed to send email") + + except Exception as e: + logger.error(f"Fatal error in main execution: {e}", exc_info=True) + raise + + +def run(): + """Entry point for command-line execution""" + asyncio.run(main()) + + +if __name__ == "__main__": + run() diff --git a/src/storage/__init__.py b/src/storage/__init__.py new file mode 100644 index 0000000..c78383b --- /dev/null +++ b/src/storage/__init__.py @@ -0,0 +1 @@ +"""Database storage for articles""" diff --git a/src/storage/database.py b/src/storage/database.py new file mode 100644 index 0000000..43d90c8 --- /dev/null +++ b/src/storage/database.py @@ -0,0 +1,194 @@ +"""SQLite database operations for article storage and deduplication""" + +import aiosqlite +import json +from datetime import datetime, timedelta +from pathlib import Path +from typing import Optional + +from .models import Article +from ..logger import get_logger + +logger = get_logger() + + +class Database: + """Async SQLite database manager""" + + def __init__(self, db_path: str | Path): + self.db_path = Path(db_path) + self.db_path.parent.mkdir(parents=True, exist_ok=True) + + async def initialize(self): + """Create database tables if they don't exist""" + async with aiosqlite.connect(self.db_path) as db: + await db.execute( + """ + CREATE TABLE IF NOT EXISTS articles ( + id TEXT PRIMARY KEY, + url TEXT NOT NULL UNIQUE, + title TEXT NOT NULL, + summary TEXT, + content TEXT NOT NULL, + published TEXT NOT NULL, + source TEXT NOT NULL, + category TEXT NOT NULL, + fetched_at TEXT NOT NULL, + relevance_score REAL, + ai_summary TEXT, + processed INTEGER DEFAULT 0, + included_in_digest INTEGER DEFAULT 0 + ) + """ + ) + + await db.execute( + """ + CREATE INDEX IF NOT EXISTS idx_published + ON articles(published) + """ + ) + + await db.execute( + """ + CREATE INDEX IF NOT EXISTS idx_fetched_at + ON articles(fetched_at) + """ + ) + + await db.commit() + + logger.info(f"Database initialized at {self.db_path}") + + async def article_exists(self, article_id: str) -> bool: + """Check if article already exists in database""" + async with aiosqlite.connect(self.db_path) as db: + async with db.execute("SELECT 1 FROM articles WHERE id = ?", (article_id,)) as cursor: + result = await cursor.fetchone() + return result is not None + + async def save_article(self, article: Article) -> bool: + """Save article to database. Returns True if saved, False if duplicate""" + if await self.article_exists(article.id): + logger.debug(f"Article already exists: {article.title}") + return False + + async with aiosqlite.connect(self.db_path) as db: + await db.execute( + """ + INSERT INTO articles ( + id, url, title, summary, content, published, source, + category, fetched_at, relevance_score, ai_summary, + processed, included_in_digest + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + article.id, + str(article.url), + article.title, + article.summary, + article.content, + article.published.isoformat(), + article.source, + article.category, + article.fetched_at.isoformat(), + article.relevance_score, + article.ai_summary, + int(article.processed), + int(article.included_in_digest), + ), + ) + await db.commit() + + logger.debug(f"Saved article: {article.title}") + return True + + async def save_articles(self, articles: list[Article]) -> int: + """Save multiple articles. Returns count of new articles saved""" + count = 0 + for article in articles: + if await self.save_article(article): + count += 1 + return count + + async def get_unprocessed_articles(self, limit: Optional[int] = None) -> list[Article]: + """Get articles that haven't been processed by AI yet""" + query = """ + SELECT * FROM articles + WHERE processed = 0 + ORDER BY published DESC + """ + if limit: + query += f" LIMIT {limit}" + + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(query) as cursor: + rows = await cursor.fetchall() + return [self._row_to_article(row) for row in rows] + + async def update_article_processing( + self, article_id: str, relevance_score: float, ai_summary: str, included: bool + ): + """Update article with AI processing results""" + async with aiosqlite.connect(self.db_path) as db: + await db.execute( + """ + UPDATE articles + SET relevance_score = ?, + ai_summary = ?, + processed = 1, + included_in_digest = ? + WHERE id = ? + """, + (relevance_score, ai_summary, int(included), article_id), + ) + await db.commit() + + async def get_todays_digest_articles(self) -> list[Article]: + """Get all articles included in today's digest""" + today = datetime.now().date() + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute( + """ + SELECT * FROM articles + WHERE included_in_digest = 1 + AND date(fetched_at) = ? + ORDER BY relevance_score DESC, published DESC + """, + (today.isoformat(),), + ) as cursor: + rows = await cursor.fetchall() + return [self._row_to_article(row) for row in rows] + + async def cleanup_old_articles(self, retention_days: int): + """Delete articles older than retention period""" + cutoff_date = datetime.now() - timedelta(days=retention_days) + async with aiosqlite.connect(self.db_path) as db: + cursor = await db.execute( + "DELETE FROM articles WHERE fetched_at < ?", (cutoff_date.isoformat(),) + ) + deleted = cursor.rowcount + await db.commit() + + if deleted > 0: + logger.info(f"Cleaned up {deleted} old articles") + + def _row_to_article(self, row: aiosqlite.Row) -> Article: + """Convert database row to Article model""" + return Article( + id=row["id"], + url=row["url"], + title=row["title"], + summary=row["summary"], + content=row["content"], + published=datetime.fromisoformat(row["published"]), + source=row["source"], + category=row["category"], + fetched_at=datetime.fromisoformat(row["fetched_at"]), + relevance_score=row["relevance_score"], + ai_summary=row["ai_summary"], + processed=bool(row["processed"]), + included_in_digest=bool(row["included_in_digest"]), + ) diff --git a/src/storage/models.py b/src/storage/models.py new file mode 100644 index 0000000..d039038 --- /dev/null +++ b/src/storage/models.py @@ -0,0 +1,44 @@ +"""Data models for articles and digests""" + +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, HttpUrl + + +class Article(BaseModel): + """Article data model""" + + id: str # Hash of URL + url: HttpUrl + title: str + summary: Optional[str] = None + content: str + published: datetime + source: str + category: str + fetched_at: datetime + + # AI processing fields + relevance_score: Optional[float] = None + ai_summary: Optional[str] = None + processed: bool = False + included_in_digest: bool = False + + +class DigestEntry(BaseModel): + """Entry in daily digest""" + + article: Article + relevance_score: float + ai_summary: str + category: str + + +class DailyDigest(BaseModel): + """Complete daily digest""" + + date: str + entries: list[DigestEntry] + total_articles_processed: int + total_articles_included: int diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..dac6bf2 --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1 @@ +"""Utility functions""" diff --git a/systemd/news-agent.service b/systemd/news-agent.service new file mode 100644 index 0000000..35cba9d --- /dev/null +++ b/systemd/news-agent.service @@ -0,0 +1,29 @@ +[Unit] +Description=News Agent - Daily Tech News Aggregator +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +User=%u +WorkingDirectory=/home/%u/news-agent +Environment="PATH=/home/%u/news-agent/.venv/bin:/usr/local/bin:/usr/bin" + +# Run the news agent +ExecStart=/home/%u/news-agent/.venv/bin/python -m src.main + +# Logging +StandardOutput=journal +StandardError=journal +SyslogIdentifier=news-agent + +# Resource limits +MemoryLimit=512M +CPUQuota=50% + +# Restart policy (only on failure) +Restart=on-failure +RestartSec=5min + +[Install] +WantedBy=default.target diff --git a/systemd/news-agent.timer b/systemd/news-agent.timer new file mode 100644 index 0000000..f311caa --- /dev/null +++ b/systemd/news-agent.timer @@ -0,0 +1,15 @@ +[Unit] +Description=News Agent Daily Timer +Requires=news-agent.service + +[Timer] +# Run daily at 07:00 Europe/Oslo time +OnCalendar=07:00 +Persistent=true + +# If system was off at trigger time, run on next boot +# but wait 5 minutes after system is up +OnBootSec=5min + +[Install] +WantedBy=timers.target