jaeswift-website/api/sitrep_generator.py

#!/usr/bin/env python3
"""SITREP Generator — Automated Daily AI Briefing for jaeswift.xyz

Fetches tech news from RSS/JSON feeds and crypto data from Binance,
sends to Venice AI for military-style summarisation, saves as JSON.

Usage:
    python3 sitrep_generator.py            # Generate today's SITREP
    python3 sitrep_generator.py 2026-04-06  # Generate for specific date
"""
import json
import os
import sys
import time
import logging
from datetime import datetime, timezone
from pathlib import Path

import feedparser
import requests

# ─── Configuration ────────────────────────────────────
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / 'data'
SITREP_DIR = DATA_DIR / 'sitreps'
KEYS_FILE = DATA_DIR / 'apikeys.json'

SITREP_DIR.mkdir(parents=True, exist_ok=True)

# Logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [SITREP] %(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
log = logging.getLogger('sitrep')

# Request headers
HEADERS = {
    'User-Agent': 'JAESWIFT-SITREP/1.0 (https://jaeswift.xyz)'
}

REDDIT_HEADERS = {
    'User-Agent': 'JAESWIFT-SITREP/1.0 by jaeswift'
}

# ─── Feed Sources ─────────────────────────────────────
RSS_FEEDS = {
    'hackernews': {
        'url': 'https://hnrss.org/frontpage?count=50',
        'type': 'rss',
        'label': 'Hacker News'
    }
}

REDDIT_FEEDS = {
    'r_technology': {
        'url': 'https://www.reddit.com/r/technology/hot.rss?limit=30',
        'type': 'rss',
        'label': 'r/technology'
    },
    'r_programming': {
        'url': 'https://www.reddit.com/r/programming/hot.rss?limit=30',
        'type': 'rss',
        'label': 'r/programming'
    },
    'r_netsec': {
        'url': 'https://www.reddit.com/r/netsec/hot.rss?limit=20',
        'type': 'rss',
        'label': 'r/netsec'
    }
}

LOBSTERS_URL = 'https://lobste.rs/hottest.json'

CRYPTO_SYMBOLS = ['SOLUSDT', 'BTCUSDT', 'ETHUSDT']
BINANCE_TICKER_URL = 'https://api.binance.com/api/v3/ticker/24hr'


# ─── Fetch Functions ──────────────────────────────────
def fetch_hn_stories():
    """Fetch Hacker News top stories via RSS."""
    stories = []
    try:
        feed = feedparser.parse(RSS_FEEDS['hackernews']['url'])
        for entry in feed.entries:
            pub = ''
            if hasattr(entry, 'published_parsed') and entry.published_parsed:
                pub = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.published_parsed)
            stories.append({
                'title': entry.get('title', 'Untitled'),
                'url': entry.get('link', ''),
                'source': 'Hacker News',
                'published': pub,
                'summary': (entry.get('summary', '') or '')[:300]
            })
        log.info(f'Fetched {len(stories)} stories from Hacker News')
    except Exception as e:
        log.error(f'HN fetch error: {e}')
    return stories


def fetch_reddit_posts(sub_key, sub_config):
    """Fetch Reddit posts via RSS feed."""
    posts = []
    try:
        feed = feedparser.parse(sub_config['url'])
        for entry in feed.entries:
            pub = ''
            if hasattr(entry, 'published_parsed') and entry.published_parsed:
                pub = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.published_parsed)
            elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
                pub = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.updated_parsed)
            posts.append({
                'title': entry.get('title', 'Untitled'),
                'url': entry.get('link', ''),
                'source': sub_config['label'],
                'published': pub,
                'summary': (entry.get('summary', '') or '')[:300]
            })
        log.info(f"Fetched {len(posts)} posts from {sub_config['label']}")
    except Exception as e:
        log.error(f"Reddit fetch error ({sub_config['label']}): {e}")
    return posts


def fetch_lobsters():
    """Fetch Lobste.rs hottest stories via JSON."""
    stories = []
    try:
        resp = requests.get(LOBSTERS_URL, headers=HEADERS, timeout=15)
        resp.raise_for_status()
        data = resp.json()
        for item in data[:30]:
            stories.append({
                'title': item.get('title', 'Untitled'),
                'url': item.get('url', '') or item.get('comments_url', ''),
                'source': 'Lobste.rs',
                'published': item.get('created_at', ''),
                'summary': (item.get('description', '') or '')[:300],
                'score': item.get('score', 0),
                'tags': item.get('tags', [])
            })
        log.info(f'Fetched {len(stories)} stories from Lobste.rs')
    except Exception as e:
        log.error(f'Lobsters fetch error: {e}')
    return stories


def fetch_crypto_data():
    """Fetch crypto market data from Binance."""
    crypto = {}
    for symbol in CRYPTO_SYMBOLS:
        try:
            resp = requests.get(
                BINANCE_TICKER_URL,
                params={'symbol': symbol},
                headers=HEADERS,
                timeout=10
            )
            resp.raise_for_status()
            data = resp.json()
            ticker = symbol.replace('USDT', '')
            crypto[ticker] = {
                'price': round(float(data.get('lastPrice', 0)), 2),
                'change': round(float(data.get('priceChangePercent', 0)), 2),
                'high_24h': round(float(data.get('highPrice', 0)), 2),
                'low_24h': round(float(data.get('lowPrice', 0)), 2),
                'volume': round(float(data.get('volume', 0)), 2)
            }
            log.info(f"{ticker}: ${crypto[ticker]['price']} ({crypto[ticker]['change']}%)")
        except Exception as e:
            log.error(f'Binance fetch error ({symbol}): {e}')
            ticker = symbol.replace('USDT', '')
            crypto[ticker] = {'price': 0, 'change': 0, 'high_24h': 0, 'low_24h': 0, 'volume': 0}
    return crypto


# ─── AI Generation ────────────────────────────────────
def build_ai_prompt(all_stories, crypto):
    """Build the prompt for Venice AI with all fetched data."""
    lines = []
    lines.append('=== RAW INTELLIGENCE DATA FOR SITREP GENERATION ===')
    lines.append(f'Date: {datetime.now(timezone.utc).strftime("%d %B %Y")}')
    lines.append(f'Time: {datetime.now(timezone.utc).strftime("%H%M")} HRS UTC')
    lines.append('')

    # Group stories by source
    sources = {}
    for s in all_stories:
        src = s.get('source', 'Unknown')
        if src not in sources:
            sources[src] = []
        sources[src].append(s)

    lines.append('=== TECHNOLOGY & PROGRAMMING FEEDS ===')
    for src_name in ['Hacker News', 'r/technology', 'r/programming', 'Lobste.rs']:
        if src_name in sources:
            lines.append(f'\n--- {src_name.upper()} ---')
            for i, s in enumerate(sources[src_name][:25], 1):
                score_str = f" [score:{s.get('score', '?')}]" if s.get('score') else ''
                url_str = f"\n   Link: {s['url']}" if s.get('url') else ''
                lines.append(f"{i}. {s['title']}{score_str}{url_str}")
                if s.get('summary'):
                    lines.append(f"   Summary: {s['summary'][:150]}")

    lines.append('\n=== CYBERSECURITY FEEDS ===')
    if 'r/netsec' in sources:
        lines.append('--- R/NETSEC ---')
        for i, s in enumerate(sources['r/netsec'][:15], 1):
            url_str = f"\n   Link: {s['url']}" if s.get('url') else ''
            lines.append(f"{i}. {s['title']}{url_str}")
            if s.get('summary'):
                lines.append(f"   Summary: {s['summary'][:150]}")

    # Also extract security-related stories from other feeds
    sec_keywords = ['security', 'vulnerability', 'exploit', 'hack', 'breach',
                    'malware', 'ransomware', 'CVE', 'zero-day', 'phishing',
                    'encryption', 'privacy', 'backdoor', 'patch', 'attack']
    sec_stories = []
    for s in all_stories:
        if s.get('source') == 'r/netsec':
            continue
        title_lower = s.get('title', '').lower()
        if any(kw in title_lower for kw in sec_keywords):
            sec_stories.append(s)
    if sec_stories:
        lines.append('\n--- SECURITY-RELATED FROM OTHER FEEDS ---')
        for i, s in enumerate(sec_stories[:10], 1):
            lines.append(f"{i}. [{s['source']}] {s['title']}")
            if s.get('url'):
                lines.append(f"   Link: {s['url']}")

    lines.append('\n=== CRYPTO MARKET DATA ===')
    for ticker, data in crypto.items():
        direction = '▲' if data['change'] >= 0 else '▼'
        lines.append(
            f"{ticker}: ${data['price']:,.2f} {direction} {data['change']:+.2f}% "
            f"| 24h High: ${data['high_24h']:,.2f} | 24h Low: ${data['low_24h']:,.2f} "
            f"| Volume: {data['volume']:,.0f}"
        )

    return '\n'.join(lines)


SITREP_SYSTEM_PROMPT = """You are a military intelligence analyst preparing a classified daily situation report (SITREP) for a special operations technology unit. Your callsign is JAE-SIGINT.

Write in terse, professional military briefing style. Use abbreviations common to military communications. Be direct, analytical, and occasionally darkly witty.

Format the SITREP in markdown with the following EXACT structure:

# DAILY SITREP — [DATE]
**CLASSIFICATION: OPEN SOURCE // JAESWIFT SIGINT**
**DTG:** [Date-Time Group in military format, e.g., 060700ZAPR2026]
**PREPARED BY:** JAE-SIGINT / AUTOMATED COLLECTION

---

## SECTOR ALPHA — TECHNOLOGY

Summarise the top 8-10 most significant technology stories. Group loosely by theme (AI/ML, infrastructure, programming languages, open source, industry moves). Each item should be 1-2 sentences max. Use bullet points. Prioritise stories by significance and novelty. After each bullet point, on a new line add a small source link in this exact format: `  *[source — read more](URL)*` using the Link URL provided in the raw data.

---

## SECTOR BRAVO — CYBERSECURITY

Summarise any cybersecurity-related stories. Include CVEs, breaches, new tools, threat intel. If few dedicated security stories, note the relatively quiet SIGINT environment. 4-8 items. After each bullet point, on a new line add a small source link in this exact format: `  *[source — read more](URL)*` using the Link URL provided in the raw data.

---

## SECTOR CHARLIE — CRYPTO MARKETS

Report crypto prices with 24h movement. Note any significant moves (>5% change). Include any crypto-related news from the feeds. Brief market sentiment.

---

## ASSESSMENT

2-3 sentences providing overall analysis. What trends are emerging? What should the operator be watching? Include one forward-looking statement.

---

**// END TRANSMISSION //**
**NEXT SCHEDULED SITREP: [TOMORROW'S DATE] 0700Z**

Rules:
- Keep total length under 1500 words
- Do NOT invent stories or data — only summarise what's provided
- If data is sparse for a sector, acknowledge it briefly
- Use markdown formatting (headers, bold, bullets, horizontal rules)
- Include the exact crypto prices provided — do not round them differently
- For each tech/security story, mention the source in brackets like [HN] [Reddit] [Lobsters]
- CRITICAL: After EVERY bullet point story in SECTOR ALPHA and SECTOR BRAVO, include the original source link on a NEW indented line formatted exactly as: *[source — read more](URL)*
- Use the Link URLs provided in the raw intelligence data — NEVER invent or guess URLs"""


def generate_with_venice(user_prompt):
    """Call Venice AI to generate the SITREP."""
    try:
        keys = json.loads(KEYS_FILE.read_text())
        venice_key = keys.get('venice', {}).get('api_key', '')
        venice_model = keys.get('venice', {}).get('model', 'llama-3.3-70b')
        if not venice_key:
            log.error('Venice API key not found in apikeys.json')
            return None, None

        log.info(f'Calling Venice AI (model: {venice_model})...')
        resp = requests.post(
            'https://api.venice.ai/api/v1/chat/completions',
            headers={
                'Authorization': f'Bearer {venice_key}',
                'Content-Type': 'application/json'
            },
            json={
                'model': venice_model,
                'messages': [
                    {'role': 'system', 'content': SITREP_SYSTEM_PROMPT},
                    {'role': 'user', 'content': user_prompt}
                ],
                'max_tokens': 2048,
                'temperature': 0.7
            },
            timeout=60
        )
        resp.raise_for_status()
        result = resp.json()
        content = result['choices'][0]['message']['content']
        log.info(f'Venice AI response received ({len(content)} chars)')
        return content, venice_model
    except Exception as e:
        log.error(f'Venice AI error: {e}')
        return None, None


def generate_headline(content):
    """Extract or generate a one-line headline from the SITREP content."""
    # Try to pull the first significant bullet point
    lines = content.split('\n')
    for line in lines:
        stripped = line.strip()
        if stripped.startswith('- ') or stripped.startswith('* '):
            headline = stripped.lstrip('-* ').strip()
            if len(headline) > 20:
                # Trim to reasonable length
                if len(headline) > 120:
                    headline = headline[:117] + '...'
                return headline
    return 'Daily intelligence briefing — technology, cybersecurity, and crypto markets'


def build_fallback_sitrep(all_stories, crypto):
    """Build a raw fallback SITREP when Venice AI is unavailable."""
    now = datetime.now(timezone.utc)
    lines = []
    lines.append(f'# DAILY SITREP — {now.strftime("%d %B %Y").upper()}')
    lines.append('**CLASSIFICATION: OPEN SOURCE // JAESWIFT SIGINT**')
    lines.append(f'**DTG:** {now.strftime("%d%H%MZ%b%Y").upper()}')
    lines.append('**PREPARED BY:** JAE-SIGINT / RAW FEED (AI UNAVAILABLE)')
    lines.append('')
    lines.append('---')
    lines.append('')
    lines.append('> ⚠️ **NOTICE:** AI summarisation unavailable. Raw intelligence feed follows.')
    lines.append('')
    lines.append('## SECTOR ALPHA — TECHNOLOGY')
    lines.append('')

    tech_stories = [s for s in all_stories if s.get('source') != 'r/netsec']
    for s in tech_stories[:15]:
        lines.append(f"- **[{s['source']}]** {s['title']}")

    lines.append('')
    lines.append('---')
    lines.append('')
    lines.append('## SECTOR BRAVO — CYBERSECURITY')
    lines.append('')

    sec_stories = [s for s in all_stories if s.get('source') == 'r/netsec']
    sec_keywords = ['security', 'vulnerability', 'exploit', 'hack', 'breach',
                    'malware', 'ransomware', 'CVE', 'zero-day', 'phishing']
    for s in all_stories:
        if s.get('source') != 'r/netsec' and any(kw in s.get('title', '').lower() for kw in sec_keywords):
            sec_stories.append(s)
    for s in sec_stories[:10]:
        lines.append(f"- **[{s['source']}]** {s['title']}")
    if not sec_stories:
        lines.append('- No cybersecurity stories intercepted this cycle.')

    lines.append('')
    lines.append('---')
    lines.append('')
    lines.append('## SECTOR CHARLIE — CRYPTO MARKETS')
    lines.append('')

    for ticker, data in crypto.items():
        direction = '🟢' if data['change'] >= 0 else '🔴'
        lines.append(
            f"- **{ticker}**: ${data['price']:,.2f} {direction} {data['change']:+.2f}%"
        )

    lines.append('')
    lines.append('---')
    lines.append('')
    lines.append('## ASSESSMENT')
    lines.append('')
    lines.append('AI analysis unavailable. Operator should review raw feed data above for emerging patterns.')
    lines.append('')
    lines.append('---')
    lines.append('')
    lines.append('**// END TRANSMISSION //**')

    return '\n'.join(lines)


# ─── Main Generation Pipeline ────────────────────────
def generate_sitrep(target_date=None):
    """Main pipeline: fetch data → AI summarise → save JSON."""
    now = datetime.now(timezone.utc)
    date_str = target_date or now.strftime('%Y-%m-%d')
    output_path = SITREP_DIR / f'{date_str}.json'

    log.info(f'=== SITREP GENERATION STARTED for {date_str} ===')

    # 1. Fetch all data
    log.info('Phase 1: Fetching intelligence data...')
    hn_stories = fetch_hn_stories()
    reddit_stories = []
    for key, config in REDDIT_FEEDS.items():
        reddit_stories.extend(fetch_reddit_posts(key, config))
    lobster_stories = fetch_lobsters()
    crypto = fetch_crypto_data()

    all_stories = hn_stories + reddit_stories + lobster_stories
    total_sources = len(all_stories)
    log.info(f'Total stories collected: {total_sources}')

    if total_sources == 0:
        log.error('No stories fetched from any source. Aborting.')
        return False

    # 2. Build AI prompt and generate
    log.info('Phase 2: Generating AI briefing...')
    user_prompt = build_ai_prompt(all_stories, crypto)
    ai_content, model_used = generate_with_venice(user_prompt)

    # 3. Build SITREP content (AI or fallback)
    if ai_content:
        content = ai_content
        headline = generate_headline(content)
    else:
        log.warning('Venice AI failed — using fallback raw SITREP')
        content = build_fallback_sitrep(all_stories, crypto)
        headline = 'Raw intelligence feed — AI summarisation unavailable'
        model_used = 'fallback'

    # 4. Save JSON
    sitrep_data = {
        'date': date_str,
        'generated_at': now.strftime('%Y-%m-%dT%H:%M:%SZ'),
        'headline': headline,
        'content': content,
        'crypto': crypto,
        'sources_used': total_sources,
        'model': model_used or 'unknown'
    }

    output_path.write_text(json.dumps(sitrep_data, indent=2))
    log.info(f'SITREP saved to {output_path}')
    log.info(f'=== SITREP GENERATION COMPLETE for {date_str} ===')
    return True


if __name__ == '__main__':
    target = sys.argv[1] if len(sys.argv) > 1 else None
    success = generate_sitrep(target)
    sys.exit(0 if success else 1)