diff --git a/api/contraband_sync.py b/api/contraband_sync.py new file mode 100644 index 0000000..ca5e8a1 --- /dev/null +++ b/api/contraband_sync.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +"""CONTRABAND Auto-Sync — Pulls latest source data and rebuilds contraband.json""" +import os, re, json, subprocess, sys +from datetime import datetime + +REPO_URL = "https://github.com/fmhy/edit.git" +REPO_DIR = "/opt/contraband-source" +OUTPUT = "/var/www/jaeswift-homepage/api/data/contraband.json" +LOG = "/var/log/contraband-sync.log" + +def log(msg): + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + line = f"[{ts}] {msg}" + print(line) + with open(LOG, "a") as f: + f.write(line + "\n") + +CATEGORY_MAP = { + 'ai': {'code': 'CRT-001', 'name': 'AI TOOLS', 'icon': '🤖'}, + 'video': {'code': 'CRT-002', 'name': 'STREAMING & VIDEO', 'icon': '📡'}, + 'audio': {'code': 'CRT-003', 'name': 'AUDIO & MUSIC', 'icon': '🎧'}, + 'gaming': {'code': 'CRT-004', 'name': 'GAMING', 'icon': '🎮'}, + 'reading': {'code': 'CRT-005', 'name': 'READING & BOOKS', 'icon': '📚'}, + 'torrenting': {'code': 'CRT-006', 'name': 'TORRENTING', 'icon': 'đŸ”ģ'}, + 'downloading': {'code': 'CRT-007', 'name': 'DOWNLOADING', 'icon': 'âŦ‡ī¸'}, + 'educational': {'code': 'CRT-008', 'name': 'EDUCATIONAL', 'icon': '🎓'}, + 'dev-tools': {'code': 'CRT-009', 'name': 'DEV TOOLS', 'icon': 'âš™ī¸'}, + 'gaming-tools': {'code': 'CRT-010', 'name': 'GAMING TOOLS', 'icon': 'đŸ•šī¸'}, + 'image-tools': {'code': 'CRT-011', 'name': 'IMAGE TOOLS', 'icon': 'đŸ–ŧī¸'}, + 'video-tools': {'code': 'CRT-012', 'name': 'VIDEO TOOLS', 'icon': 'đŸŽŦ'}, + 'internet-tools': {'code': 'CRT-013', 'name': 'INTERNET TOOLS', 'icon': '🌐'}, + 'social-media-tools': {'code': 'CRT-014', 'name': 'SOCIAL MEDIA', 'icon': '📱'}, + 'text-tools': {'code': 'CRT-015', 'name': 'TEXT TOOLS', 'icon': '📝'}, + 'file-tools': {'code': 'CRT-016', 'name': 'FILE TOOLS', 'icon': '📁'}, + 'system-tools': {'code': 'CRT-017', 'name': 'SYSTEM TOOLS', 'icon': 'đŸ’ģ'}, + 'storage': {'code': 'CRT-018', 'name': 'STORAGE & CLOUD', 'icon': '💾'}, + 'privacy': {'code': 'CRT-019', 'name': 'PRIVACY & SECURITY', 'icon': '🔒'}, + 'linux-macos': {'code': 'CRT-020', 'name': 'LINUX & MACOS', 'icon': '🐧'}, + 'mobile': {'code': 'CRT-021', 'name': 'MOBILE', 'icon': '📲'}, + 'misc': {'code': 'CRT-022', 'name': 'MISCELLANEOUS', 'icon': 'đŸ“Ļ'}, + 'non-english': {'code': 'CRT-023', 'name': 'NON-ENGLISH', 'icon': '🌍'}, + 'unsafe': {'code': 'CRT-024', 'name': 'UNSAFE SITES', 'icon': 'âš ī¸'}, +} + +def parse_entry(line): + """Parse a markdown bullet line into an entry dict.""" + line = line.strip() + if not line or line.startswith('#'): + return None + + # Remove leading bullet + line = re.sub(r'^[\-\*]\s*', '', line) + if not line: + return None + + starred = False + if line.startswith('⭐'): + starred = True + line = line[1:].strip() + + entry = {'name': '', 'url': '', 'description': '', 'starred': starred, 'extra_links': []} + + # Extract main link: [name](url) + main_match = re.match(r'\[([^\]]+)\]\(([^)]+)\)', line) + if main_match: + entry['name'] = main_match.group(1).strip() + entry['url'] = main_match.group(2).strip() + rest = line[main_match.end():].strip() + else: + # No link, just text + entry['name'] = line + rest = '' + + if rest: + # Remove leading separators + rest = re.sub(r'^[\s\-–—/,]+', '', rest).strip() + + # Extract extra links + extra_links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', rest) + for ename, eurl in extra_links: + entry['extra_links'].append({'name': ename.strip(), 'url': eurl.strip()}) + + # Description is the non-link text + desc = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', '', rest).strip() + desc = re.sub(r'^[\s\-–—/,]+', '', desc).strip() + desc = re.sub(r'[\s\-–—/,]+$', '', desc).strip() + entry['description'] = desc + + if not entry['name'] and not entry['url']: + return None + + return entry + +def parse_markdown_file(filepath, cat_key): + """Parse a single markdown file into structured category data.""" + cat_info = CATEGORY_MAP.get(cat_key, {'code': f'CRT-{cat_key.upper()}', 'name': cat_key.upper(), 'icon': '📄'}) + + with open(filepath, 'r', encoding='utf-8') as f: + lines = f.readlines() + + subcategories = [] + current_sub = None + + for line in lines: + line_stripped = line.strip() + + # Skip frontmatter + if line_stripped == '---': + continue + if line_stripped.startswith('title:') or line_stripped.startswith('description:'): + continue + + # Subcategory headers: # â–ē or ## or ## ▷ + header_match = re.match(r'^(#{1,3})\s*[â–ē▷]?\s*(.+)', line_stripped) + if header_match: + level = len(header_match.group(1)) + name = header_match.group(2).strip() + name = re.sub(r'[â–ē▷]', '', name).strip() + if name and not name.lower().startswith('note') and len(name) > 1: + current_sub = {'name': name, 'entries': [], 'notes': []} + subcategories.append(current_sub) + continue + + # Note lines + if line_stripped.startswith('!!!') or line_stripped.startswith(':::'): + note_text = re.sub(r'^[!:]+\s*(note|warning|tip|info)?\s*', '', line_stripped, flags=re.IGNORECASE).strip() + if note_text and current_sub: + current_sub['notes'].append(note_text) + continue + + # Bullet entries + if re.match(r'^[\-\*]\s', line_stripped): + if current_sub is None: + current_sub = {'name': 'General', 'entries': [], 'notes': []} + subcategories.append(current_sub) + + entry = parse_entry(line_stripped) + if entry: + current_sub['entries'].append(entry) + + # Build category + total_entries = sum(len(s['entries']) for s in subcategories) + starred_count = sum(1 for s in subcategories for e in s['entries'] if e['starred']) + + return { + 'code': cat_info['code'], + 'name': cat_info['name'], + 'icon': cat_info['icon'], + 'slug': cat_key, + 'entry_count': total_entries, + 'starred_count': starred_count, + 'subcategory_count': len(subcategories), + 'subcategories': subcategories + } + +def sync(): + log("Starting sync...") + + # Clone or pull + if os.path.exists(os.path.join(REPO_DIR, '.git')): + log("Pulling latest...") + result = subprocess.run(['git', '-C', REPO_DIR, 'pull', '--ff-only'], capture_output=True, text=True) + log(f"Git pull: {result.stdout.strip()}") + if 'Already up to date' in result.stdout: + log("No changes detected. Rebuilding anyway.") + else: + log("Cloning repo...") + subprocess.run(['git', 'clone', '--depth', '1', REPO_URL, REPO_DIR], check=True) + log("Clone complete.") + + docs_dir = os.path.join(REPO_DIR, 'docs') + if not os.path.exists(docs_dir): + log(f"ERROR: docs dir not found at {docs_dir}") + sys.exit(1) + + # Map filenames to category keys + file_map = {} + for fname in os.listdir(docs_dir): + if not fname.endswith('.md'): + continue + key = fname.replace('.md', '').lower() + # Skip non-category files + if key in ('index', 'startpage', 'sandbox', 'posts', 'beginners-guide', 'feedback'): + continue + # Normalise keys + key_norm = key.replace('-', '-') + if key_norm in CATEGORY_MAP or key_norm.replace('-', '') in [k.replace('-', '') for k in CATEGORY_MAP]: + file_map[fname] = key_norm if key_norm in CATEGORY_MAP else key + else: + file_map[fname] = key + + log(f"Found {len(file_map)} category files to parse") + + categories = [] + total_entries = 0 + total_starred = 0 + + for fname, cat_key in sorted(file_map.items()): + filepath = os.path.join(docs_dir, fname) + try: + cat = parse_markdown_file(filepath, cat_key) + categories.append(cat) + total_entries += cat['entry_count'] + total_starred += cat['starred_count'] + log(f" {cat['code']} {cat['name']}: {cat['entry_count']} entries ({cat['starred_count']} starred)") + except Exception as e: + log(f" ERROR parsing {fname}: {e}") + + # Sort by code + categories.sort(key=lambda c: c['code']) + + # Build output + output = { + 'source': 'curated', + 'last_updated': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + 'total_entries': total_entries, + 'total_starred': total_starred, + 'total_categories': len(categories), + 'categories': categories + } + + # Write + os.makedirs(os.path.dirname(OUTPUT), exist_ok=True) + with open(OUTPUT, 'w', encoding='utf-8') as f: + json.dump(output, f, ensure_ascii=False) + + size_mb = os.path.getsize(OUTPUT) / (1024 * 1024) + log(f"Output: {OUTPUT} ({size_mb:.1f} MB)") + log(f"Total: {total_entries} entries, {total_starred} starred, {len(categories)} categories") + + # Restart API + log("Restarting jaeswift-api...") + result = subprocess.run(['systemctl', 'restart', 'jaeswift-api'], capture_output=True, text=True) + if result.returncode == 0: + log("API restarted successfully.") + else: + log(f"API restart failed: {result.stderr}") + + log("Sync complete.") + +if __name__ == '__main__': + sync()