#!/usr/bin/env python3 """AWESOMELIST Auto-Sync โ€” Pulls latest trackawesomelist data and rebuilds JSON""" import os, re, json, subprocess, sys from datetime import datetime from pathlib import Path REPO_URL = "https://github.com/trackawesomelist/trackawesomelist.git" REPO_DIR = "/opt/awesomelist-source" OUTPUT_DIR = "/var/www/jaeswift-homepage/api/data/awesomelist" OUTPUT_INDEX = "/var/www/jaeswift-homepage/api/data/awesomelist_index.json" LOG = "/var/log/awesomelist-sync.log" SECTOR_MAP = { 'PRP-001': {'name': 'PROGRAMMING LANGUAGES', 'icon': '๐Ÿ’ป', 'tags': ['python', 'go', 'rust', 'javascript', 'typescript', 'ruby', 'java', 'kotlin', 'swift', 'dart', 'elixir', 'erlang', 'haskell', 'lua', 'perl', 'php', 'scala', 'clojure', 'crystal', 'nim', 'zig', 'v', 'ocaml', 'r', 'julia', 'fortran', 'pascal', 'ada', 'c', 'cpp', 'csharp', 'fsharp', 'groovy', 'elm', 'purescript', 'idris', 'coq', 'vala', 'actionscript', 'autohotkey', 'autoit', 'commonlisp', 'clojurescript', 'qsharp', 'd', 'eta', 'frege']}, 'PRP-002': {'name': 'WEB FRONTEND', 'icon': '๐ŸŒ', 'tags': ['react', 'vue', 'angular', 'svelte', 'css', 'html', 'tailwind', 'bootstrap', 'sass', 'less', 'webpack', 'vite', 'nextjs', 'nuxt', 'gatsby', 'preact', 'ember', 'backbone', 'knockout', 'cyclejs', 'choo', 'mithril', 'polymer', 'lit', 'storybook', 'draft-js', 'redux', 'relay', 'graphql', 'webcomponent', 'ant-design', 'material-ui', 'flexbox', 'web-animation', 'motion-ui', 'progressive-web', 'service-worker', 'web-extension', 'browserify', 'yew', 'seed-rs', 'aurelia', 'marionette', 'dojo', 'jquery', 'inertia']}, 'PRP-003': {'name': 'WEB BACKEND', 'icon': 'โš™๏ธ', 'tags': ['nodejs', 'django', 'flask', 'rails', 'laravel', 'symfony', 'express', 'fastapi', 'fiber', 'vapor', 'phoenix', 'spring', 'dropwizard', 'vert.x', 'play1', 'cakephp', 'phalcon', 'lumen', 'slim', 'pyramid', 'wagtail', 'directus', 'refinery', 'umbraco', 'sitecore', 'drupal', 'wordpress', 'plone', 'silverstripe', 'craft', 'magento', 'rest', 'microservice', 'serverless', 'jamstack', 'meteor', 'deno', 'npm', 'gulp', 'eslint']}, 'PRP-004': {'name': 'MOBILE DEVELOPMENT', 'icon': '๐Ÿ“ฑ', 'tags': ['android', 'ios', 'flutter', 'react-native', 'ionic', 'cordova', 'capacitor', 'xamarin', 'appium', 'swift-playground']}, 'PRP-005': {'name': 'GAMING & GAME DEV', 'icon': '๐ŸŽฎ', 'tags': ['gamedev', 'godot', 'unity', 'libgdx', 'love2d', 'pico-8', 'chip-8', 'flame', 'playcanvas', 'haxe-gamedev', 'gideros', 'game-engine', 'game-dataset', 'game-remake', 'open-source-game', 'games-of-coding', 'game-talk', 'ironsworn', 'minecraft', 'board-game', 'pokemon', 'chess', 'esports', 'pixel-art', 'gbdev', 'dos', 'frc']}, 'PRP-006': {'name': 'AI & MACHINE LEARNING', 'icon': '๐Ÿค–', 'tags': ['machine-learning', 'deep-learning', 'tensorflow', 'pytorch', 'jax', 'nlp', 'computer-vision', 'chatgpt', 'gpt3', 'generative', 'langchain', 'ai-tool', 'ai-finance', 'ai4lam', 'coreml', 'artificial-intelligence', 'deep-vision', 'xai', 'awesome-ai', 'gemini-cli']}, 'PRP-007': {'name': 'DATA SCIENCE & ANALYTICS', 'icon': '๐Ÿ“Š', 'tags': ['datascience', 'data-engineering', 'bigdata', 'analytics', 'streaming', 'spark', 'hadoop', 'polars', 'dash', 'jupyter', 'dataviz', 'json', 'csv', 'json-dataset', 'information-retrieval', 'quantified-self', 'quant']}, 'PRP-008': {'name': 'CLOUD & DEVOPS', 'icon': 'โ˜๏ธ', 'tags': ['docker', 'kubernetes', 'terraform', 'ansible', 'aws', 'azure', 'gcp', 'cloudflare', 'digitalocean', 'ibmcloud', 'heroku', 'ci', 'cd', 'sre', 'devsecops', 'saltstack', 'vagrant', 'kustomize', 'opentofu', 'cdk', 'k6', 'pulumi', 'container']}, 'PRP-009': {'name': 'DATABASES', 'icon': '๐Ÿ—„๏ธ', 'tags': ['postgres', 'mysql', 'mongodb', 'redis', 'neo4j', 'cassandra', 'couchdb', 'rethinkdb', 'influxdb', 'hbase', 'tdengine', 'nosql', 'db-tool', 'sql']}, 'PRP-010': {'name': 'SECURITY & PRIVACY', 'icon': '๐Ÿ”’', 'tags': ['security', 'hacking', 'pentest', 'ctf', 'malware', 'honeypot', 'incident-response', 'crypto', 'cryptography', 'privacy', 'appsec', 'vehicle-security', 'web-security', 'lockpicking', 'osint', 'fuzzing', 'evm-security', 'blueteam', 'gdpr', 'pci-dss']}, 'PRP-011': {'name': 'SYSTEMS & PLATFORMS', 'icon': '๐Ÿ–ฅ๏ธ', 'tags': ['linux', 'macos', 'windows', 'bsd', 'dos', 'raspberry-pi', 'wsl', 'nix', 'arch', 'kde', 'gnome', 'qgis', 'qubes', 'amazon-alexa', 'actions-on-google', 'home-assistant', 'smart-tv', 'fuse', 'ros2']}, 'PRP-012': {'name': 'DEVELOPER TOOLS', 'icon': '๐Ÿ› ๏ธ', 'tags': ['git', 'vim', 'neovim', 'emacs', 'vscode', 'atom', 'jetbrains', 'sublime', 'devenv', 'devtools', 'shell', 'zsh', 'fish', 'tmux', 'cli-app', 'terminal', 'powershell', 'bash', 'dtrace', 'cmake', 'composer', 'alfred', 'scriptable', 'pinned-gist', 'code-review', 'git-addon', 'git-hook', 'github']}, 'PRP-013': {'name': 'PACKAGE MANAGERS & BUILD', 'icon': '๐Ÿ“ฆ', 'tags': ['npm', 'webpack', 'gulp', 'rollup', 'esbuild', 'micro-npm', 'npm-script', 'awesome-lint']}, 'PRP-014': {'name': 'TESTING & QA', 'icon': '๐Ÿงช', 'tags': ['testing', 'selenium', 'playwright', 'ava', 'tap', 'regression', 'gatling', 'jmeter', 'static-analysis', 'qa']}, 'PRP-015': {'name': 'SOFTWARE ARCHITECTURE', 'icon': '๐Ÿ—๏ธ', 'tags': ['design-pattern', 'ddd', 'software-architecture', 'microservice', 'functional-programming', 'recursion-scheme']}, 'PRP-016': {'name': 'IoT & HARDWARE', 'icon': '๐Ÿ”Œ', 'tags': ['iot', 'embedded', 'arduino', 'esp', 'circuitpython', 'adafruit', 'micropython', 'raspberry', 'robot', 'lidar', 'open-hardware', 'electronics', 'beacon', 'mqtt', 'fpga']}, 'PRP-017': {'name': 'BLOCKCHAIN & CRYPTO', 'icon': 'โ›“๏ธ', 'tags': ['blockchain', 'bitcoin', 'ethereum', 'solana', 'algorand', 'ripple', 'corda', 'substrate', 'stacks-chain', 'golem', 'eosio', 'waves', 'non-financial-blockchain', 'crypto-paper', 'coin']}, 'PRP-018': {'name': 'SCIENCE & RESEARCH', 'icon': '๐Ÿงฌ', 'tags': ['science', 'math', 'physics', 'bioinformatics', 'computational-biology', 'neuroscience', 'cheminformatics', 'bioie', 'parasite', 'agriculture', 'cropsteering', 'scientific-computing', 'scientific-writing', 'research', 'latex', 'tikz']}, 'PRP-019': {'name': 'EDUCATION & LEARNING', 'icon': '๐Ÿ“š', 'tags': ['education', 'learn', 'courses', 'tutorial', 'programming-for-kids', 'educational-game', 'computer-science', 'competitive-programming', 'algorithm', 'kata', 'interview', 'roadmap', 'free-programming-book', 'beginner', 'talk', 'tech-video']}, 'PRP-020': {'name': 'DESIGN & UI/UX', 'icon': '๐ŸŽจ', 'tags': ['design', 'ui', 'ux', 'design-system', 'design-principle', 'web-design', 'product-design', 'sketch', 'framer', 'creative-coding', 'canvas', 'webgl', 'vulkan', 'opengl', 'charting', 'd3', 'colorful', 'font', 'icon']}, 'PRP-021': {'name': 'MEDIA & CONTENT', 'icon': '๐ŸŽฌ', 'tags': ['video', 'audio', 'music', 'podcast', 'broadcasting', 'ffmpeg', 'vlc', 'webaudio', 'audio-visualization', 'photography', 'gif', 'creative-tech', 'audiovisual', 'pixel-art']}, 'PRP-022': {'name': 'BUSINESS & CAREER', 'icon': '๐Ÿ’ผ', 'tags': ['business', 'startup', 'indie', 'product-management', 'project-management', 'okr', 'leading', 'managing', 'remote-job', 'job-board', 'internship', 'freelance', 'marketing', 'billing', 'amazon-seller', 'social-enterprise', 'open-company', 'speaking', 'developer-first']}, 'PRP-023': {'name': 'COMMUNITY & CULTURE', 'icon': '๐ŸŒ', 'tags': ['diversity', 'for-girls', 'mental-health', 'accessibility', 'humane-tech', 'earth', 'clean-tech', 'veganism', 'theravada', 'uncopyright', 'ad-free', 'free-software', 'open-source-supporter', 'maintainer', 'patreon', 'naming', 'falsehood', 'answer', 'ama', 'speaker', 'event', 'conference', 'italy-event', 'netherlands-event', 'european-tech']}, 'PRP-024': {'name': 'NETWORKING & COMMS', 'icon': '๐Ÿ“ก', 'tags': ['network', 'sdn', 'pcap', 'snmp', 'irc', 'mastodon', 'slack', 'discord', 'email', 'rtc', 'connectivity', 'ssh', 'radio', 'hacker-news', 'chatops', 'chat', 'bot']}, 'PRP-025': {'name': 'UTILITIES & PRODUCTIVITY', 'icon': '๐Ÿ”ง', 'tags': ['productivity', 'selfhosted', 'sysadmin', 'tool', 'lowcode', 'no-login', 'calculator', 'userscript', 'boilerplate', 'building-block', 'pagespeed', 'readme', 'htaccess', 'stock-resource', 'creative-commons', 'ponyfill', 'promise', 'observable', 'workflow-automation', 'distraction-blocker']}, 'PRP-026': {'name': 'CONTENT MANAGEMENT', 'icon': '๐Ÿ“„', 'tags': ['cms', 'markdown', 'text-editing', 'book-authoring', 'blog', 'newsletter', 'rss', 'web-archiving', 'digital-history', 'open-source-document']}, 'PRP-027': {'name': 'HEALTH & WELLNESS', 'icon': '๐Ÿฅ', 'tags': ['health', 'healthcare', 'glp1', 'mental-health', 'biomedical', 'digital-health']}, 'PRP-028': {'name': 'MISCELLANEOUS', 'icon': '๐Ÿ“ฆ', 'tags': []}, } def log(msg): ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") line = f"[{ts}] {msg}" print(line) with open(LOG, "a") as f: f.write(line + "\n") def clean_name(name): cleaned = re.sub(r'^[Aa]wesome[- _]*', '', name).strip() return cleaned if cleaned else name def pull_repo(): if os.path.isdir(os.path.join(REPO_DIR, '.git')): log("Pulling latest changes...") subprocess.run(['git', '-C', REPO_DIR, 'fetch', '--all'], check=True, capture_output=True) subprocess.run(['git', '-C', REPO_DIR, 'reset', '--hard', 'origin/main'], check=True, capture_output=True) else: log("Cloning repository...") os.makedirs(REPO_DIR, exist_ok=True) subprocess.run(['git', 'clone', '--depth', '1', REPO_URL, REPO_DIR], check=True, capture_output=True) log("Repository updated.") def parse_readme(filepath): """Parse a single awesome list README.md into structured data.""" try: with open(filepath, 'r', encoding='utf-8', errors='replace') as f: content = f.read() except: return None lines = content.split('\n') subcategories = [] current_sub = None title = '' description = '' github_url = '' stars = '' # Extract frontmatter or title for i, line in enumerate(lines): if line.startswith('# '): title = line[2:].strip() break # Extract description (first paragraph after title) in_desc = False for i, line in enumerate(lines): if line.startswith('# '): in_desc = True continue if in_desc: stripped = line.strip() if stripped and not stripped.startswith('#') and not stripped.startswith('[') and not stripped.startswith('!'): description = stripped break elif stripped.startswith('#'): break # Parse sections and entries link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)') for line in lines: stripped = line.strip() # Section headers header_match = re.match(r'^(#{2,6})\s+(.+)', stripped) if header_match: level = len(header_match.group(1)) header_name = header_match.group(2).strip() # Remove trailing links from header header_name = re.sub(r'\s*\[.*?\]\(.*?\)', '', header_name).strip() if header_name and header_name.lower() not in ('contents', 'table of contents', 'toc', 'license', 'contributing', 'footnotes'): current_sub = {'name': header_name, 'parent': '', 'entries': []} subcategories.append(current_sub) continue # List items with links if stripped.startswith(('-', '*')) and '[' in stripped and '](' in stripped: matches = link_pattern.findall(stripped) if matches: entry_name = matches[0][0] entry_url = matches[0][1] # Get description after the link entry_desc = '' desc_match = re.search(r'\)\s*[-โ€“โ€”:]?\s*(.+)', stripped) if desc_match: entry_desc = desc_match.group(1).strip() entry = {'name': entry_name, 'url': entry_url, 'description': entry_desc} if current_sub is None: current_sub = {'name': 'General', 'parent': '', 'entries': []} subcategories.append(current_sub) current_sub['entries'].append(entry) # Filter empty subcategories subcategories = [s for s in subcategories if s['entries']] total_entries = sum(len(s['entries']) for s in subcategories) return { 'title': clean_name(title), 'description': description[:300], 'github_url': github_url, 'stars': stars, 'entry_count': total_entries, 'subcategory_count': len(subcategories), 'subcategories': subcategories, 'name': '' } def classify_list(slug, title, description): """Assign a list to a sector based on slug/title/description matching.""" text = f"{slug} {title} {description}".lower() for code, sector in SECTOR_MAP.items(): if code == 'PRP-028': # Misc is fallback continue for tag in sector['tags']: if tag in text: return code return 'PRP-028' # Miscellaneous fallback def build_data(): content_dir = os.path.join(REPO_DIR, 'content') if not os.path.isdir(content_dir): log(f"ERROR: content dir not found: {content_dir}") return False os.makedirs(OUTPUT_DIR, exist_ok=True) # Clear old data for f in os.listdir(OUTPUT_DIR): if f.endswith('.json'): os.remove(os.path.join(OUTPUT_DIR, f)) sectors = {code: {**info, 'code': code, 'lists': [], 'list_count': 0, 'total_entries': 0} for code, info in SECTOR_MAP.items()} total_lists = 0 total_entries = 0 # Walk content directory for org_dir in sorted(os.listdir(content_dir)): org_path = os.path.join(content_dir, org_dir) if not os.path.isdir(org_path): continue for repo_dir in sorted(os.listdir(org_path)): repo_path = os.path.join(org_path, repo_dir) readme_path = os.path.join(repo_path, 'README.md') if not os.path.isfile(readme_path): # Try readme.md lowercase readme_path = os.path.join(repo_path, 'readme.md') if not os.path.isfile(readme_path): continue slug = f"{org_dir}--{repo_dir}" data = parse_readme(readme_path) if data is None or data['entry_count'] == 0: continue data['slug'] = slug sector_code = classify_list(slug, data['title'], data['description']) data['tag'] = sector_code # Save individual file out_file = os.path.join(OUTPUT_DIR, f"{slug}.json") with open(out_file, 'w') as f: json.dump(data, f) # Add to sector index sectors[sector_code]['lists'].append({ 'slug': slug, 'title': data['title'], 'description': data['description'][:200], 'stars': data.get('stars', ''), 'entry_count': data['entry_count'], 'subcategory_count': data['subcategory_count'] }) sectors[sector_code]['list_count'] += 1 sectors[sector_code]['total_entries'] += data['entry_count'] total_lists += 1 total_entries += data['entry_count'] # Sort lists within sectors by entry count for code in sectors: sectors[code]['lists'].sort(key=lambda x: x['entry_count'], reverse=True) # Build index (remove empty sectors) sector_list = [s for s in sectors.values() if s['list_count'] > 0] sector_list.sort(key=lambda x: x['code']) index = { 'total_lists': total_lists, 'total_entries': total_entries, 'sector_count': len(sector_list), 'sectors': sector_list } with open(OUTPUT_INDEX, 'w') as f: json.dump(index, f) log(f"Built {total_lists} lists, {total_entries} entries across {len(sector_list)} sectors") return True def restart_api(): log("Restarting API service...") try: subprocess.run(['systemctl', 'restart', 'jaeswift-api'], check=True, capture_output=True) log("API restarted.") except Exception as e: log(f"WARNING: Could not restart API: {e}") def main(): log("="*60) log("AWESOMELIST SYNC STARTED") try: pull_repo() if build_data(): restart_api() log("SYNC COMPLETED SUCCESSFULLY") else: log("SYNC FAILED - build error") except Exception as e: log(f"SYNC FAILED: {e}") sys.exit(1) if __name__ == '__main__': main()