#!/usr/bin/env python3
"""AWESOMELIST Auto-Sync — Pulls latest trackawesomelist data and rebuilds JSON"""
import os, re, json, subprocess, sys
from datetime import datetime
from pathlib import Path

REPO_URL = "https://github.com/trackawesomelist/trackawesomelist.git"
REPO_DIR = "/opt/awesomelist-source"
OUTPUT_DIR = "/var/www/jaeswift-homepage/api/data/awesomelist"
OUTPUT_INDEX = "/var/www/jaeswift-homepage/api/data/awesomelist_index.json"
LOG = "/var/log/awesomelist-sync.log"

SECTOR_MAP = {
    'PRP-001': {'name': 'PROGRAMMING LANGUAGES', 'icon': '💻', 'tags': ['python', 'go', 'rust', 'javascript', 'typescript', 'ruby', 'java', 'kotlin', 'swift', 'dart', 'elixir', 'erlang', 'haskell', 'lua', 'perl', 'php', 'scala', 'clojure', 'crystal', 'nim', 'zig', 'v', 'ocaml', 'r', 'julia', 'fortran', 'pascal', 'ada', 'c', 'cpp', 'csharp', 'fsharp', 'groovy', 'elm', 'purescript', 'idris', 'coq', 'vala', 'actionscript', 'autohotkey', 'autoit', 'commonlisp', 'clojurescript', 'qsharp', 'd', 'eta', 'frege']},
    'PRP-002': {'name': 'WEB FRONTEND', 'icon': '🌐', 'tags': ['react', 'vue', 'angular', 'svelte', 'css', 'html', 'tailwind', 'bootstrap', 'sass', 'less', 'webpack', 'vite', 'nextjs', 'nuxt', 'gatsby', 'preact', 'ember', 'backbone', 'knockout', 'cyclejs', 'choo', 'mithril', 'polymer', 'lit', 'storybook', 'draft-js', 'redux', 'relay', 'graphql', 'webcomponent', 'ant-design', 'material-ui', 'flexbox', 'web-animation', 'motion-ui', 'progressive-web', 'service-worker', 'web-extension', 'browserify', 'yew', 'seed-rs', 'aurelia', 'marionette', 'dojo', 'jquery', 'inertia']},
    'PRP-003': {'name': 'WEB BACKEND', 'icon': '⚙️', 'tags': ['nodejs', 'django', 'flask', 'rails', 'laravel', 'symfony', 'express', 'fastapi', 'fiber', 'vapor', 'phoenix', 'spring', 'dropwizard', 'vert.x', 'play1', 'cakephp', 'phalcon', 'lumen', 'slim', 'pyramid', 'wagtail', 'directus', 'refinery', 'umbraco', 'sitecore', 'drupal', 'wordpress', 'plone', 'silverstripe', 'craft', 'magento', 'rest', 'microservice', 'serverless', 'jamstack', 'meteor', 'deno', 'npm', 'gulp', 'eslint']},
    'PRP-004': {'name': 'MOBILE DEVELOPMENT', 'icon': '📱', 'tags': ['android', 'ios', 'flutter', 'react-native', 'ionic', 'cordova', 'capacitor', 'xamarin', 'appium', 'swift-playground']},
    'PRP-005': {'name': 'GAMING & GAME DEV', 'icon': '🎮', 'tags': ['gamedev', 'godot', 'unity', 'libgdx', 'love2d', 'pico-8', 'chip-8', 'flame', 'playcanvas', 'haxe-gamedev', 'gideros', 'game-engine', 'game-dataset', 'game-remake', 'open-source-game', 'games-of-coding', 'game-talk', 'ironsworn', 'minecraft', 'board-game', 'pokemon', 'chess', 'esports', 'pixel-art', 'gbdev', 'dos', 'frc']},
    'PRP-006': {'name': 'AI & MACHINE LEARNING', 'icon': '🤖', 'tags': ['machine-learning', 'deep-learning', 'tensorflow', 'pytorch', 'jax', 'nlp', 'computer-vision', 'chatgpt', 'gpt3', 'generative', 'langchain', 'ai-tool', 'ai-finance', 'ai4lam', 'coreml', 'artificial-intelligence', 'deep-vision', 'xai', 'awesome-ai', 'gemini-cli']},
    'PRP-007': {'name': 'DATA SCIENCE & ANALYTICS', 'icon': '📊', 'tags': ['datascience', 'data-engineering', 'bigdata', 'analytics', 'streaming', 'spark', 'hadoop', 'polars', 'dash', 'jupyter', 'dataviz', 'json', 'csv', 'json-dataset', 'information-retrieval', 'quantified-self', 'quant']},
    'PRP-008': {'name': 'CLOUD & DEVOPS', 'icon': '☁️', 'tags': ['docker', 'kubernetes', 'terraform', 'ansible', 'aws', 'azure', 'gcp', 'cloudflare', 'digitalocean', 'ibmcloud', 'heroku', 'ci', 'cd', 'sre', 'devsecops', 'saltstack', 'vagrant', 'kustomize', 'opentofu', 'cdk', 'k6', 'pulumi', 'container']},
    'PRP-009': {'name': 'DATABASES', 'icon': '🗄️', 'tags': ['postgres', 'mysql', 'mongodb', 'redis', 'neo4j', 'cassandra', 'couchdb', 'rethinkdb', 'influxdb', 'hbase', 'tdengine', 'nosql', 'db-tool', 'sql']},
    'PRP-010': {'name': 'SECURITY & PRIVACY', 'icon': '🔒', 'tags': ['security', 'hacking', 'pentest', 'ctf', 'malware', 'honeypot', 'incident-response', 'crypto', 'cryptography', 'privacy', 'appsec', 'vehicle-security', 'web-security', 'lockpicking', 'osint', 'fuzzing', 'evm-security', 'blueteam', 'gdpr', 'pci-dss']},
    'PRP-011': {'name': 'SYSTEMS & PLATFORMS', 'icon': '🖥️', 'tags': ['linux', 'macos', 'windows', 'bsd', 'dos', 'raspberry-pi', 'wsl', 'nix', 'arch', 'kde', 'gnome', 'qgis', 'qubes', 'amazon-alexa', 'actions-on-google', 'home-assistant', 'smart-tv', 'fuse', 'ros2']},
    'PRP-012': {'name': 'DEVELOPER TOOLS', 'icon': '🛠️', 'tags': ['git', 'vim', 'neovim', 'emacs', 'vscode', 'atom', 'jetbrains', 'sublime', 'devenv', 'devtools', 'shell', 'zsh', 'fish', 'tmux', 'cli-app', 'terminal', 'powershell', 'bash', 'dtrace', 'cmake', 'composer', 'alfred', 'scriptable', 'pinned-gist', 'code-review', 'git-addon', 'git-hook', 'github']},
    'PRP-013': {'name': 'PACKAGE MANAGERS & BUILD', 'icon': '📦', 'tags': ['npm', 'webpack', 'gulp', 'rollup', 'esbuild', 'micro-npm', 'npm-script', 'awesome-lint']},
    'PRP-014': {'name': 'TESTING & QA', 'icon': '🧪', 'tags': ['testing', 'selenium', 'playwright', 'ava', 'tap', 'regression', 'gatling', 'jmeter', 'static-analysis', 'qa']},
    'PRP-015': {'name': 'SOFTWARE ARCHITECTURE', 'icon': '🏗️', 'tags': ['design-pattern', 'ddd', 'software-architecture', 'microservice', 'functional-programming', 'recursion-scheme']},
    'PRP-016': {'name': 'IoT & HARDWARE', 'icon': '🔌', 'tags': ['iot', 'embedded', 'arduino', 'esp', 'circuitpython', 'adafruit', 'micropython', 'raspberry', 'robot', 'lidar', 'open-hardware', 'electronics', 'beacon', 'mqtt', 'fpga']},
    'PRP-017': {'name': 'BLOCKCHAIN & CRYPTO', 'icon': '⛓️', 'tags': ['blockchain', 'bitcoin', 'ethereum', 'solana', 'algorand', 'ripple', 'corda', 'substrate', 'stacks-chain', 'golem', 'eosio', 'waves', 'non-financial-blockchain', 'crypto-paper', 'coin']},
    'PRP-018': {'name': 'SCIENCE & RESEARCH', 'icon': '🧬', 'tags': ['science', 'math', 'physics', 'bioinformatics', 'computational-biology', 'neuroscience', 'cheminformatics', 'bioie', 'parasite', 'agriculture', 'cropsteering', 'scientific-computing', 'scientific-writing', 'research', 'latex', 'tikz']},
    'PRP-019': {'name': 'EDUCATION & LEARNING', 'icon': '📚', 'tags': ['education', 'learn', 'courses', 'tutorial', 'programming-for-kids', 'educational-game', 'computer-science', 'competitive-programming', 'algorithm', 'kata', 'interview', 'roadmap', 'free-programming-book', 'beginner', 'talk', 'tech-video']},
    'PRP-020': {'name': 'DESIGN & UI/UX', 'icon': '🎨', 'tags': ['design', 'ui', 'ux', 'design-system', 'design-principle', 'web-design', 'product-design', 'sketch', 'framer', 'creative-coding', 'canvas', 'webgl', 'vulkan', 'opengl', 'charting', 'd3', 'colorful', 'font', 'icon']},
    'PRP-021': {'name': 'MEDIA & CONTENT', 'icon': '🎬', 'tags': ['video', 'audio', 'music', 'podcast', 'broadcasting', 'ffmpeg', 'vlc', 'webaudio', 'audio-visualization', 'photography', 'gif', 'creative-tech', 'audiovisual', 'pixel-art']},
    'PRP-022': {'name': 'BUSINESS & CAREER', 'icon': '💼', 'tags': ['business', 'startup', 'indie', 'product-management', 'project-management', 'okr', 'leading', 'managing', 'remote-job', 'job-board', 'internship', 'freelance', 'marketing', 'billing', 'amazon-seller', 'social-enterprise', 'open-company', 'speaking', 'developer-first']},
    'PRP-023': {'name': 'COMMUNITY & CULTURE', 'icon': '🌍', 'tags': ['diversity', 'for-girls', 'mental-health', 'accessibility', 'humane-tech', 'earth', 'clean-tech', 'veganism', 'theravada', 'uncopyright', 'ad-free', 'free-software', 'open-source-supporter', 'maintainer', 'patreon', 'naming', 'falsehood', 'answer', 'ama', 'speaker', 'event', 'conference', 'italy-event', 'netherlands-event', 'european-tech']},
    'PRP-024': {'name': 'NETWORKING & COMMS', 'icon': '📡', 'tags': ['network', 'sdn', 'pcap', 'snmp', 'irc', 'mastodon', 'slack', 'discord', 'email', 'rtc', 'connectivity', 'ssh', 'radio', 'hacker-news', 'chatops', 'chat', 'bot']},
    'PRP-025': {'name': 'UTILITIES & PRODUCTIVITY', 'icon': '🔧', 'tags': ['productivity', 'selfhosted', 'sysadmin', 'tool', 'lowcode', 'no-login', 'calculator', 'userscript', 'boilerplate', 'building-block', 'pagespeed', 'readme', 'htaccess', 'stock-resource', 'creative-commons', 'ponyfill', 'promise', 'observable', 'workflow-automation', 'distraction-blocker']},
    'PRP-026': {'name': 'CONTENT MANAGEMENT', 'icon': '📄', 'tags': ['cms', 'markdown', 'text-editing', 'book-authoring', 'blog', 'newsletter', 'rss', 'web-archiving', 'digital-history', 'open-source-document']},
    'PRP-027': {'name': 'HEALTH & WELLNESS', 'icon': '🏥', 'tags': ['health', 'healthcare', 'glp1', 'mental-health', 'biomedical', 'digital-health']},
    'PRP-028': {'name': 'MISCELLANEOUS', 'icon': '📦', 'tags': []},
}

def log(msg):
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    line = f"[{ts}] {msg}"
    print(line)
    with open(LOG, "a") as f:
        f.write(line + "\n")

def clean_name(name):
    cleaned = re.sub(r'^[Aa]wesome[- _]*', '', name).strip()
    return cleaned if cleaned else name

def pull_repo():
    if os.path.isdir(os.path.join(REPO_DIR, '.git')):
        log("Pulling latest changes...")
        subprocess.run(['git', '-C', REPO_DIR, 'fetch', '--all'], check=True, capture_output=True)
        subprocess.run(['git', '-C', REPO_DIR, 'reset', '--hard', 'origin/main'], check=True, capture_output=True)
    else:
        log("Cloning repository...")
        os.makedirs(REPO_DIR, exist_ok=True)
        subprocess.run(['git', 'clone', '--depth', '1', REPO_URL, REPO_DIR], check=True, capture_output=True)
    log("Repository updated.")

def parse_readme(filepath):
    """Parse a single awesome list README.md into structured data."""
    try:
        with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
            content = f.read()
    except:
        return None

    lines = content.split('\n')
    subcategories = []
    current_sub = None
    title = ''
    description = ''
    github_url = ''
    stars = ''

    # Extract frontmatter or title
    for i, line in enumerate(lines):
        if line.startswith('# '):
            title = line[2:].strip()
            break

    # Extract description (first paragraph after title)
    in_desc = False
    for i, line in enumerate(lines):
        if line.startswith('# '):
            in_desc = True
            continue
        if in_desc:
            stripped = line.strip()
            if stripped and not stripped.startswith('#') and not stripped.startswith('[') and not stripped.startswith('!'):
                description = stripped
                break
            elif stripped.startswith('#'):
                break

    # Parse sections and entries
    link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')

    for line in lines:
        stripped = line.strip()

        # Section headers
        header_match = re.match(r'^(#{2,6})\s+(.+)', stripped)
        if header_match:
            level = len(header_match.group(1))
            header_name = header_match.group(2).strip()
            # Remove trailing links from header
            header_name = re.sub(r'\s*\[.*?\]\(.*?\)', '', header_name).strip()
            if header_name and header_name.lower() not in ('contents', 'table of contents', 'toc', 'license', 'contributing', 'footnotes'):
                current_sub = {'name': header_name, 'parent': '', 'entries': []}
                subcategories.append(current_sub)
            continue

        # List items with links
        if stripped.startswith(('-', '*')) and '[' in stripped and '](' in stripped:
            matches = link_pattern.findall(stripped)
            if matches:
                entry_name = matches[0][0]
                entry_url = matches[0][1]
                # Get description after the link
                entry_desc = ''
                desc_match = re.search(r'\)\s*[-–—:]?\s*(.+)', stripped)
                if desc_match:
                    entry_desc = desc_match.group(1).strip()

                entry = {'name': entry_name, 'url': entry_url, 'description': entry_desc}

                if current_sub is None:
                    current_sub = {'name': 'General', 'parent': '', 'entries': []}
                    subcategories.append(current_sub)
                current_sub['entries'].append(entry)

    # Filter empty subcategories
    subcategories = [s for s in subcategories if s['entries']]

    total_entries = sum(len(s['entries']) for s in subcategories)

    return {
        'title': clean_name(title),
        'description': description[:300],
        'github_url': github_url,
        'stars': stars,
        'entry_count': total_entries,
        'subcategory_count': len(subcategories),
        'subcategories': subcategories,
        'name': ''
    }

def classify_list(slug, title, description):
    """Assign a list to a sector based on slug/title/description matching."""
    text = f"{slug} {title} {description}".lower()

    for code, sector in SECTOR_MAP.items():
        if code == 'PRP-028':  # Misc is fallback
            continue
        for tag in sector['tags']:
            if tag in text:
                return code

    return 'PRP-028'  # Miscellaneous fallback

def build_data():
    content_dir = os.path.join(REPO_DIR, 'content')
    if not os.path.isdir(content_dir):
        log(f"ERROR: content dir not found: {content_dir}")
        return False

    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # Clear old data
    for f in os.listdir(OUTPUT_DIR):
        if f.endswith('.json'):
            os.remove(os.path.join(OUTPUT_DIR, f))

    sectors = {code: {**info, 'code': code, 'lists': [], 'list_count': 0, 'total_entries': 0} for code, info in SECTOR_MAP.items()}
    total_lists = 0
    total_entries = 0

    # Walk content directory
    for org_dir in sorted(os.listdir(content_dir)):
        org_path = os.path.join(content_dir, org_dir)
        if not os.path.isdir(org_path):
            continue

        for repo_dir in sorted(os.listdir(org_path)):
            repo_path = os.path.join(org_path, repo_dir)
            readme_path = os.path.join(repo_path, 'README.md')

            if not os.path.isfile(readme_path):
                # Try readme.md lowercase
                readme_path = os.path.join(repo_path, 'readme.md')
                if not os.path.isfile(readme_path):
                    continue

            slug = f"{org_dir}--{repo_dir}"
            data = parse_readme(readme_path)

            if data is None or data['entry_count'] == 0:
                continue

            data['slug'] = slug
            sector_code = classify_list(slug, data['title'], data['description'])
            data['tag'] = sector_code

            # Save individual file
            out_file = os.path.join(OUTPUT_DIR, f"{slug}.json")
            with open(out_file, 'w') as f:
                json.dump(data, f)

            # Add to sector index
            sectors[sector_code]['lists'].append({
                'slug': slug,
                'title': data['title'],
                'description': data['description'][:200],
                'stars': data.get('stars', ''),
                'entry_count': data['entry_count'],
                'subcategory_count': data['subcategory_count']
            })
            sectors[sector_code]['list_count'] += 1
            sectors[sector_code]['total_entries'] += data['entry_count']

            total_lists += 1
            total_entries += data['entry_count']

    # Sort lists within sectors by entry count
    for code in sectors:
        sectors[code]['lists'].sort(key=lambda x: x['entry_count'], reverse=True)

    # Build index (remove empty sectors)
    sector_list = [s for s in sectors.values() if s['list_count'] > 0]
    sector_list.sort(key=lambda x: x['code'])

    index = {
        'total_lists': total_lists,
        'total_entries': total_entries,
        'sector_count': len(sector_list),
        'sectors': sector_list
    }

    with open(OUTPUT_INDEX, 'w') as f:
        json.dump(index, f)

    log(f"Built {total_lists} lists, {total_entries} entries across {len(sector_list)} sectors")
    return True

def restart_api():
    log("Restarting API service...")
    try:
        subprocess.run(['systemctl', 'restart', 'jaeswift-api'], check=True, capture_output=True)
        log("API restarted.")
    except Exception as e:
        log(f"WARNING: Could not restart API: {e}")

def main():
    log("="*60)
    log("AWESOMELIST SYNC STARTED")
    try:
        pull_repo()
        if build_data():
            restart_api()
            log("SYNC COMPLETED SUCCESSFULLY")
        else:
            log("SYNC FAILED - build error")
    except Exception as e:
        log(f"SYNC FAILED: {e}")
        sys.exit(1)

if __name__ == '__main__':
    main()