From dd1d5adef5e6cd3691f90eff199feea8deda926c Mon Sep 17 00:00:00 2001 From: jae Date: Sat, 4 Apr 2026 02:55:50 +0000 Subject: [PATCH] feat: add awesomelist auto-sync script --- api/awesomelist_sync.py | 275 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 api/awesomelist_sync.py diff --git a/api/awesomelist_sync.py b/api/awesomelist_sync.py new file mode 100644 index 0000000..90000fa --- /dev/null +++ b/api/awesomelist_sync.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +"""AWESOMELIST Auto-Sync โ€” Pulls latest trackawesomelist data and rebuilds JSON""" +import os, re, json, subprocess, sys +from datetime import datetime +from pathlib import Path + +REPO_URL = "https://github.com/trackawesomelist/trackawesomelist.git" +REPO_DIR = "/opt/awesomelist-source" +OUTPUT_DIR = "/var/www/jaeswift-homepage/api/data/awesomelist" +OUTPUT_INDEX = "/var/www/jaeswift-homepage/api/data/awesomelist_index.json" +LOG = "/var/log/awesomelist-sync.log" + +SECTOR_MAP = { + 'PRP-001': {'name': 'PROGRAMMING LANGUAGES', 'icon': '๐Ÿ’ป', 'tags': ['python', 'go', 'rust', 'javascript', 'typescript', 'ruby', 'java', 'kotlin', 'swift', 'dart', 'elixir', 'erlang', 'haskell', 'lua', 'perl', 'php', 'scala', 'clojure', 'crystal', 'nim', 'zig', 'v', 'ocaml', 'r', 'julia', 'fortran', 'pascal', 'ada', 'c', 'cpp', 'csharp', 'fsharp', 'groovy', 'elm', 'purescript', 'idris', 'coq', 'vala', 'actionscript', 'autohotkey', 'autoit', 'commonlisp', 'clojurescript', 'qsharp', 'd', 'eta', 'frege']}, + 'PRP-002': {'name': 'WEB FRONTEND', 'icon': '๐ŸŒ', 'tags': ['react', 'vue', 'angular', 'svelte', 'css', 'html', 'tailwind', 'bootstrap', 'sass', 'less', 'webpack', 'vite', 'nextjs', 'nuxt', 'gatsby', 'preact', 'ember', 'backbone', 'knockout', 'cyclejs', 'choo', 'mithril', 'polymer', 'lit', 'storybook', 'draft-js', 'redux', 'relay', 'graphql', 'webcomponent', 'ant-design', 'material-ui', 'flexbox', 'web-animation', 'motion-ui', 'progressive-web', 'service-worker', 'web-extension', 'browserify', 'yew', 'seed-rs', 'aurelia', 'marionette', 'dojo', 'jquery', 'inertia']}, + 'PRP-003': {'name': 'WEB BACKEND', 'icon': 'โš™๏ธ', 'tags': ['nodejs', 'django', 'flask', 'rails', 'laravel', 'symfony', 'express', 'fastapi', 'fiber', 'vapor', 'phoenix', 'spring', 'dropwizard', 'vert.x', 'play1', 'cakephp', 'phalcon', 'lumen', 'slim', 'pyramid', 'wagtail', 'directus', 'refinery', 'umbraco', 'sitecore', 'drupal', 'wordpress', 'plone', 'silverstripe', 'craft', 'magento', 'rest', 'microservice', 'serverless', 'jamstack', 'meteor', 'deno', 'npm', 'gulp', 'eslint']}, + 'PRP-004': {'name': 'MOBILE DEVELOPMENT', 'icon': '๐Ÿ“ฑ', 'tags': ['android', 'ios', 'flutter', 'react-native', 'ionic', 'cordova', 'capacitor', 'xamarin', 'appium', 'swift-playground']}, + 'PRP-005': {'name': 'GAMING & GAME DEV', 'icon': '๐ŸŽฎ', 'tags': ['gamedev', 'godot', 'unity', 'libgdx', 'love2d', 'pico-8', 'chip-8', 'flame', 'playcanvas', 'haxe-gamedev', 'gideros', 'game-engine', 'game-dataset', 'game-remake', 'open-source-game', 'games-of-coding', 'game-talk', 'ironsworn', 'minecraft', 'board-game', 'pokemon', 'chess', 'esports', 'pixel-art', 'gbdev', 'dos', 'frc']}, + 'PRP-006': {'name': 'AI & MACHINE LEARNING', 'icon': '๐Ÿค–', 'tags': ['machine-learning', 'deep-learning', 'tensorflow', 'pytorch', 'jax', 'nlp', 'computer-vision', 'chatgpt', 'gpt3', 'generative', 'langchain', 'ai-tool', 'ai-finance', 'ai4lam', 'coreml', 'artificial-intelligence', 'deep-vision', 'xai', 'awesome-ai', 'gemini-cli']}, + 'PRP-007': {'name': 'DATA SCIENCE & ANALYTICS', 'icon': '๐Ÿ“Š', 'tags': ['datascience', 'data-engineering', 'bigdata', 'analytics', 'streaming', 'spark', 'hadoop', 'polars', 'dash', 'jupyter', 'dataviz', 'json', 'csv', 'json-dataset', 'information-retrieval', 'quantified-self', 'quant']}, + 'PRP-008': {'name': 'CLOUD & DEVOPS', 'icon': 'โ˜๏ธ', 'tags': ['docker', 'kubernetes', 'terraform', 'ansible', 'aws', 'azure', 'gcp', 'cloudflare', 'digitalocean', 'ibmcloud', 'heroku', 'ci', 'cd', 'sre', 'devsecops', 'saltstack', 'vagrant', 'kustomize', 'opentofu', 'cdk', 'k6', 'pulumi', 'container']}, + 'PRP-009': {'name': 'DATABASES', 'icon': '๐Ÿ—„๏ธ', 'tags': ['postgres', 'mysql', 'mongodb', 'redis', 'neo4j', 'cassandra', 'couchdb', 'rethinkdb', 'influxdb', 'hbase', 'tdengine', 'nosql', 'db-tool', 'sql']}, + 'PRP-010': {'name': 'SECURITY & PRIVACY', 'icon': '๐Ÿ”’', 'tags': ['security', 'hacking', 'pentest', 'ctf', 'malware', 'honeypot', 'incident-response', 'crypto', 'cryptography', 'privacy', 'appsec', 'vehicle-security', 'web-security', 'lockpicking', 'osint', 'fuzzing', 'evm-security', 'blueteam', 'gdpr', 'pci-dss']}, + 'PRP-011': {'name': 'SYSTEMS & PLATFORMS', 'icon': '๐Ÿ–ฅ๏ธ', 'tags': ['linux', 'macos', 'windows', 'bsd', 'dos', 'raspberry-pi', 'wsl', 'nix', 'arch', 'kde', 'gnome', 'qgis', 'qubes', 'amazon-alexa', 'actions-on-google', 'home-assistant', 'smart-tv', 'fuse', 'ros2']}, + 'PRP-012': {'name': 'DEVELOPER TOOLS', 'icon': '๐Ÿ› ๏ธ', 'tags': ['git', 'vim', 'neovim', 'emacs', 'vscode', 'atom', 'jetbrains', 'sublime', 'devenv', 'devtools', 'shell', 'zsh', 'fish', 'tmux', 'cli-app', 'terminal', 'powershell', 'bash', 'dtrace', 'cmake', 'composer', 'alfred', 'scriptable', 'pinned-gist', 'code-review', 'git-addon', 'git-hook', 'github']}, + 'PRP-013': {'name': 'PACKAGE MANAGERS & BUILD', 'icon': '๐Ÿ“ฆ', 'tags': ['npm', 'webpack', 'gulp', 'rollup', 'esbuild', 'micro-npm', 'npm-script', 'awesome-lint']}, + 'PRP-014': {'name': 'TESTING & QA', 'icon': '๐Ÿงช', 'tags': ['testing', 'selenium', 'playwright', 'ava', 'tap', 'regression', 'gatling', 'jmeter', 'static-analysis', 'qa']}, + 'PRP-015': {'name': 'SOFTWARE ARCHITECTURE', 'icon': '๐Ÿ—๏ธ', 'tags': ['design-pattern', 'ddd', 'software-architecture', 'microservice', 'functional-programming', 'recursion-scheme']}, + 'PRP-016': {'name': 'IoT & HARDWARE', 'icon': '๐Ÿ”Œ', 'tags': ['iot', 'embedded', 'arduino', 'esp', 'circuitpython', 'adafruit', 'micropython', 'raspberry', 'robot', 'lidar', 'open-hardware', 'electronics', 'beacon', 'mqtt', 'fpga']}, + 'PRP-017': {'name': 'BLOCKCHAIN & CRYPTO', 'icon': 'โ›“๏ธ', 'tags': ['blockchain', 'bitcoin', 'ethereum', 'solana', 'algorand', 'ripple', 'corda', 'substrate', 'stacks-chain', 'golem', 'eosio', 'waves', 'non-financial-blockchain', 'crypto-paper', 'coin']}, + 'PRP-018': {'name': 'SCIENCE & RESEARCH', 'icon': '๐Ÿงฌ', 'tags': ['science', 'math', 'physics', 'bioinformatics', 'computational-biology', 'neuroscience', 'cheminformatics', 'bioie', 'parasite', 'agriculture', 'cropsteering', 'scientific-computing', 'scientific-writing', 'research', 'latex', 'tikz']}, + 'PRP-019': {'name': 'EDUCATION & LEARNING', 'icon': '๐Ÿ“š', 'tags': ['education', 'learn', 'courses', 'tutorial', 'programming-for-kids', 'educational-game', 'computer-science', 'competitive-programming', 'algorithm', 'kata', 'interview', 'roadmap', 'free-programming-book', 'beginner', 'talk', 'tech-video']}, + 'PRP-020': {'name': 'DESIGN & UI/UX', 'icon': '๐ŸŽจ', 'tags': ['design', 'ui', 'ux', 'design-system', 'design-principle', 'web-design', 'product-design', 'sketch', 'framer', 'creative-coding', 'canvas', 'webgl', 'vulkan', 'opengl', 'charting', 'd3', 'colorful', 'font', 'icon']}, + 'PRP-021': {'name': 'MEDIA & CONTENT', 'icon': '๐ŸŽฌ', 'tags': ['video', 'audio', 'music', 'podcast', 'broadcasting', 'ffmpeg', 'vlc', 'webaudio', 'audio-visualization', 'photography', 'gif', 'creative-tech', 'audiovisual', 'pixel-art']}, + 'PRP-022': {'name': 'BUSINESS & CAREER', 'icon': '๐Ÿ’ผ', 'tags': ['business', 'startup', 'indie', 'product-management', 'project-management', 'okr', 'leading', 'managing', 'remote-job', 'job-board', 'internship', 'freelance', 'marketing', 'billing', 'amazon-seller', 'social-enterprise', 'open-company', 'speaking', 'developer-first']}, + 'PRP-023': {'name': 'COMMUNITY & CULTURE', 'icon': '๐ŸŒ', 'tags': ['diversity', 'for-girls', 'mental-health', 'accessibility', 'humane-tech', 'earth', 'clean-tech', 'veganism', 'theravada', 'uncopyright', 'ad-free', 'free-software', 'open-source-supporter', 'maintainer', 'patreon', 'naming', 'falsehood', 'answer', 'ama', 'speaker', 'event', 'conference', 'italy-event', 'netherlands-event', 'european-tech']}, + 'PRP-024': {'name': 'NETWORKING & COMMS', 'icon': '๐Ÿ“ก', 'tags': ['network', 'sdn', 'pcap', 'snmp', 'irc', 'mastodon', 'slack', 'discord', 'email', 'rtc', 'connectivity', 'ssh', 'radio', 'hacker-news', 'chatops', 'chat', 'bot']}, + 'PRP-025': {'name': 'UTILITIES & PRODUCTIVITY', 'icon': '๐Ÿ”ง', 'tags': ['productivity', 'selfhosted', 'sysadmin', 'tool', 'lowcode', 'no-login', 'calculator', 'userscript', 'boilerplate', 'building-block', 'pagespeed', 'readme', 'htaccess', 'stock-resource', 'creative-commons', 'ponyfill', 'promise', 'observable', 'workflow-automation', 'distraction-blocker']}, + 'PRP-026': {'name': 'CONTENT MANAGEMENT', 'icon': '๐Ÿ“„', 'tags': ['cms', 'markdown', 'text-editing', 'book-authoring', 'blog', 'newsletter', 'rss', 'web-archiving', 'digital-history', 'open-source-document']}, + 'PRP-027': {'name': 'HEALTH & WELLNESS', 'icon': '๐Ÿฅ', 'tags': ['health', 'healthcare', 'glp1', 'mental-health', 'biomedical', 'digital-health']}, + 'PRP-028': {'name': 'MISCELLANEOUS', 'icon': '๐Ÿ“ฆ', 'tags': []}, +} + +def log(msg): + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + line = f"[{ts}] {msg}" + print(line) + with open(LOG, "a") as f: + f.write(line + "\n") + +def clean_name(name): + cleaned = re.sub(r'^[Aa]wesome[- _]*', '', name).strip() + return cleaned if cleaned else name + +def pull_repo(): + if os.path.isdir(os.path.join(REPO_DIR, '.git')): + log("Pulling latest changes...") + subprocess.run(['git', '-C', REPO_DIR, 'fetch', '--all'], check=True, capture_output=True) + subprocess.run(['git', '-C', REPO_DIR, 'reset', '--hard', 'origin/main'], check=True, capture_output=True) + else: + log("Cloning repository...") + os.makedirs(REPO_DIR, exist_ok=True) + subprocess.run(['git', 'clone', '--depth', '1', REPO_URL, REPO_DIR], check=True, capture_output=True) + log("Repository updated.") + +def parse_readme(filepath): + """Parse a single awesome list README.md into structured data.""" + try: + with open(filepath, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + except: + return None + + lines = content.split('\n') + subcategories = [] + current_sub = None + title = '' + description = '' + github_url = '' + stars = '' + + # Extract frontmatter or title + for i, line in enumerate(lines): + if line.startswith('# '): + title = line[2:].strip() + break + + # Extract description (first paragraph after title) + in_desc = False + for i, line in enumerate(lines): + if line.startswith('# '): + in_desc = True + continue + if in_desc: + stripped = line.strip() + if stripped and not stripped.startswith('#') and not stripped.startswith('[') and not stripped.startswith('!'): + description = stripped + break + elif stripped.startswith('#'): + break + + # Parse sections and entries + link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)') + + for line in lines: + stripped = line.strip() + + # Section headers + header_match = re.match(r'^(#{2,6})\s+(.+)', stripped) + if header_match: + level = len(header_match.group(1)) + header_name = header_match.group(2).strip() + # Remove trailing links from header + header_name = re.sub(r'\s*\[.*?\]\(.*?\)', '', header_name).strip() + if header_name and header_name.lower() not in ('contents', 'table of contents', 'toc', 'license', 'contributing', 'footnotes'): + current_sub = {'name': header_name, 'parent': '', 'entries': []} + subcategories.append(current_sub) + continue + + # List items with links + if stripped.startswith(('-', '*')) and '[' in stripped and '](' in stripped: + matches = link_pattern.findall(stripped) + if matches: + entry_name = matches[0][0] + entry_url = matches[0][1] + # Get description after the link + entry_desc = '' + desc_match = re.search(r'\)\s*[-โ€“โ€”:]?\s*(.+)', stripped) + if desc_match: + entry_desc = desc_match.group(1).strip() + + entry = {'name': entry_name, 'url': entry_url, 'description': entry_desc} + + if current_sub is None: + current_sub = {'name': 'General', 'parent': '', 'entries': []} + subcategories.append(current_sub) + current_sub['entries'].append(entry) + + # Filter empty subcategories + subcategories = [s for s in subcategories if s['entries']] + + total_entries = sum(len(s['entries']) for s in subcategories) + + return { + 'title': clean_name(title), + 'description': description[:300], + 'github_url': github_url, + 'stars': stars, + 'entry_count': total_entries, + 'subcategory_count': len(subcategories), + 'subcategories': subcategories, + 'name': '' + } + +def classify_list(slug, title, description): + """Assign a list to a sector based on slug/title/description matching.""" + text = f"{slug} {title} {description}".lower() + + for code, sector in SECTOR_MAP.items(): + if code == 'PRP-028': # Misc is fallback + continue + for tag in sector['tags']: + if tag in text: + return code + + return 'PRP-028' # Miscellaneous fallback + +def build_data(): + content_dir = os.path.join(REPO_DIR, 'content') + if not os.path.isdir(content_dir): + log(f"ERROR: content dir not found: {content_dir}") + return False + + os.makedirs(OUTPUT_DIR, exist_ok=True) + + # Clear old data + for f in os.listdir(OUTPUT_DIR): + if f.endswith('.json'): + os.remove(os.path.join(OUTPUT_DIR, f)) + + sectors = {code: {**info, 'code': code, 'lists': [], 'list_count': 0, 'total_entries': 0} for code, info in SECTOR_MAP.items()} + total_lists = 0 + total_entries = 0 + + # Walk content directory + for org_dir in sorted(os.listdir(content_dir)): + org_path = os.path.join(content_dir, org_dir) + if not os.path.isdir(org_path): + continue + + for repo_dir in sorted(os.listdir(org_path)): + repo_path = os.path.join(org_path, repo_dir) + readme_path = os.path.join(repo_path, 'README.md') + + if not os.path.isfile(readme_path): + # Try readme.md lowercase + readme_path = os.path.join(repo_path, 'readme.md') + if not os.path.isfile(readme_path): + continue + + slug = f"{org_dir}--{repo_dir}" + data = parse_readme(readme_path) + + if data is None or data['entry_count'] == 0: + continue + + data['slug'] = slug + sector_code = classify_list(slug, data['title'], data['description']) + data['tag'] = sector_code + + # Save individual file + out_file = os.path.join(OUTPUT_DIR, f"{slug}.json") + with open(out_file, 'w') as f: + json.dump(data, f) + + # Add to sector index + sectors[sector_code]['lists'].append({ + 'slug': slug, + 'title': data['title'], + 'description': data['description'][:200], + 'stars': data.get('stars', ''), + 'entry_count': data['entry_count'], + 'subcategory_count': data['subcategory_count'] + }) + sectors[sector_code]['list_count'] += 1 + sectors[sector_code]['total_entries'] += data['entry_count'] + + total_lists += 1 + total_entries += data['entry_count'] + + # Sort lists within sectors by entry count + for code in sectors: + sectors[code]['lists'].sort(key=lambda x: x['entry_count'], reverse=True) + + # Build index (remove empty sectors) + sector_list = [s for s in sectors.values() if s['list_count'] > 0] + sector_list.sort(key=lambda x: x['code']) + + index = { + 'total_lists': total_lists, + 'total_entries': total_entries, + 'sector_count': len(sector_list), + 'sectors': sector_list + } + + with open(OUTPUT_INDEX, 'w') as f: + json.dump(index, f) + + log(f"Built {total_lists} lists, {total_entries} entries across {len(sector_list)} sectors") + return True + +def restart_api(): + log("Restarting API service...") + try: + subprocess.run(['systemctl', 'restart', 'jaeswift-api'], check=True, capture_output=True) + log("API restarted.") + except Exception as e: + log(f"WARNING: Could not restart API: {e}") + +def main(): + log("="*60) + log("AWESOMELIST SYNC STARTED") + try: + pull_repo() + if build_data(): + restart_api() + log("SYNC COMPLETED SUCCESSFULLY") + else: + log("SYNC FAILED - build error") + except Exception as e: + log(f"SYNC FAILED: {e}") + sys.exit(1) + +if __name__ == '__main__': + main()