feat: contraband auto-sync with weekly cron, 2-col subcategories
This commit is contained in:
parent
fe3b32276e
commit
5ff7cc4592
1 changed files with 242 additions and 0 deletions
242
api/contraband_sync.py
Normal file
242
api/contraband_sync.py
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
#!/usr/bin/env python3
|
||||
"""CONTRABAND Auto-Sync — Pulls latest source data and rebuilds contraband.json"""
|
||||
import os, re, json, subprocess, sys
|
||||
from datetime import datetime
|
||||
|
||||
REPO_URL = "https://github.com/fmhy/edit.git"
|
||||
REPO_DIR = "/opt/contraband-source"
|
||||
OUTPUT = "/var/www/jaeswift-homepage/api/data/contraband.json"
|
||||
LOG = "/var/log/contraband-sync.log"
|
||||
|
||||
def log(msg):
|
||||
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
line = f"[{ts}] {msg}"
|
||||
print(line)
|
||||
with open(LOG, "a") as f:
|
||||
f.write(line + "\n")
|
||||
|
||||
CATEGORY_MAP = {
|
||||
'ai': {'code': 'CRT-001', 'name': 'AI TOOLS', 'icon': '🤖'},
|
||||
'video': {'code': 'CRT-002', 'name': 'STREAMING & VIDEO', 'icon': '📡'},
|
||||
'audio': {'code': 'CRT-003', 'name': 'AUDIO & MUSIC', 'icon': '🎧'},
|
||||
'gaming': {'code': 'CRT-004', 'name': 'GAMING', 'icon': '🎮'},
|
||||
'reading': {'code': 'CRT-005', 'name': 'READING & BOOKS', 'icon': '📚'},
|
||||
'torrenting': {'code': 'CRT-006', 'name': 'TORRENTING', 'icon': '🔻'},
|
||||
'downloading': {'code': 'CRT-007', 'name': 'DOWNLOADING', 'icon': '⬇️'},
|
||||
'educational': {'code': 'CRT-008', 'name': 'EDUCATIONAL', 'icon': '🎓'},
|
||||
'dev-tools': {'code': 'CRT-009', 'name': 'DEV TOOLS', 'icon': '⚙️'},
|
||||
'gaming-tools': {'code': 'CRT-010', 'name': 'GAMING TOOLS', 'icon': '🕹️'},
|
||||
'image-tools': {'code': 'CRT-011', 'name': 'IMAGE TOOLS', 'icon': '🖼️'},
|
||||
'video-tools': {'code': 'CRT-012', 'name': 'VIDEO TOOLS', 'icon': '🎬'},
|
||||
'internet-tools': {'code': 'CRT-013', 'name': 'INTERNET TOOLS', 'icon': '🌐'},
|
||||
'social-media-tools': {'code': 'CRT-014', 'name': 'SOCIAL MEDIA', 'icon': '📱'},
|
||||
'text-tools': {'code': 'CRT-015', 'name': 'TEXT TOOLS', 'icon': '📝'},
|
||||
'file-tools': {'code': 'CRT-016', 'name': 'FILE TOOLS', 'icon': '📁'},
|
||||
'system-tools': {'code': 'CRT-017', 'name': 'SYSTEM TOOLS', 'icon': '💻'},
|
||||
'storage': {'code': 'CRT-018', 'name': 'STORAGE & CLOUD', 'icon': '💾'},
|
||||
'privacy': {'code': 'CRT-019', 'name': 'PRIVACY & SECURITY', 'icon': '🔒'},
|
||||
'linux-macos': {'code': 'CRT-020', 'name': 'LINUX & MACOS', 'icon': '🐧'},
|
||||
'mobile': {'code': 'CRT-021', 'name': 'MOBILE', 'icon': '📲'},
|
||||
'misc': {'code': 'CRT-022', 'name': 'MISCELLANEOUS', 'icon': '📦'},
|
||||
'non-english': {'code': 'CRT-023', 'name': 'NON-ENGLISH', 'icon': '🌍'},
|
||||
'unsafe': {'code': 'CRT-024', 'name': 'UNSAFE SITES', 'icon': '⚠️'},
|
||||
}
|
||||
|
||||
def parse_entry(line):
|
||||
"""Parse a markdown bullet line into an entry dict."""
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
return None
|
||||
|
||||
# Remove leading bullet
|
||||
line = re.sub(r'^[\-\*]\s*', '', line)
|
||||
if not line:
|
||||
return None
|
||||
|
||||
starred = False
|
||||
if line.startswith('⭐'):
|
||||
starred = True
|
||||
line = line[1:].strip()
|
||||
|
||||
entry = {'name': '', 'url': '', 'description': '', 'starred': starred, 'extra_links': []}
|
||||
|
||||
# Extract main link: [name](url)
|
||||
main_match = re.match(r'\[([^\]]+)\]\(([^)]+)\)', line)
|
||||
if main_match:
|
||||
entry['name'] = main_match.group(1).strip()
|
||||
entry['url'] = main_match.group(2).strip()
|
||||
rest = line[main_match.end():].strip()
|
||||
else:
|
||||
# No link, just text
|
||||
entry['name'] = line
|
||||
rest = ''
|
||||
|
||||
if rest:
|
||||
# Remove leading separators
|
||||
rest = re.sub(r'^[\s\-–—/,]+', '', rest).strip()
|
||||
|
||||
# Extract extra links
|
||||
extra_links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', rest)
|
||||
for ename, eurl in extra_links:
|
||||
entry['extra_links'].append({'name': ename.strip(), 'url': eurl.strip()})
|
||||
|
||||
# Description is the non-link text
|
||||
desc = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', '', rest).strip()
|
||||
desc = re.sub(r'^[\s\-–—/,]+', '', desc).strip()
|
||||
desc = re.sub(r'[\s\-–—/,]+$', '', desc).strip()
|
||||
entry['description'] = desc
|
||||
|
||||
if not entry['name'] and not entry['url']:
|
||||
return None
|
||||
|
||||
return entry
|
||||
|
||||
def parse_markdown_file(filepath, cat_key):
|
||||
"""Parse a single markdown file into structured category data."""
|
||||
cat_info = CATEGORY_MAP.get(cat_key, {'code': f'CRT-{cat_key.upper()}', 'name': cat_key.upper(), 'icon': '📄'})
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
subcategories = []
|
||||
current_sub = None
|
||||
|
||||
for line in lines:
|
||||
line_stripped = line.strip()
|
||||
|
||||
# Skip frontmatter
|
||||
if line_stripped == '---':
|
||||
continue
|
||||
if line_stripped.startswith('title:') or line_stripped.startswith('description:'):
|
||||
continue
|
||||
|
||||
# Subcategory headers: # ► or ## or ## ▷
|
||||
header_match = re.match(r'^(#{1,3})\s*[►▷]?\s*(.+)', line_stripped)
|
||||
if header_match:
|
||||
level = len(header_match.group(1))
|
||||
name = header_match.group(2).strip()
|
||||
name = re.sub(r'[►▷]', '', name).strip()
|
||||
if name and not name.lower().startswith('note') and len(name) > 1:
|
||||
current_sub = {'name': name, 'entries': [], 'notes': []}
|
||||
subcategories.append(current_sub)
|
||||
continue
|
||||
|
||||
# Note lines
|
||||
if line_stripped.startswith('!!!') or line_stripped.startswith(':::'):
|
||||
note_text = re.sub(r'^[!:]+\s*(note|warning|tip|info)?\s*', '', line_stripped, flags=re.IGNORECASE).strip()
|
||||
if note_text and current_sub:
|
||||
current_sub['notes'].append(note_text)
|
||||
continue
|
||||
|
||||
# Bullet entries
|
||||
if re.match(r'^[\-\*]\s', line_stripped):
|
||||
if current_sub is None:
|
||||
current_sub = {'name': 'General', 'entries': [], 'notes': []}
|
||||
subcategories.append(current_sub)
|
||||
|
||||
entry = parse_entry(line_stripped)
|
||||
if entry:
|
||||
current_sub['entries'].append(entry)
|
||||
|
||||
# Build category
|
||||
total_entries = sum(len(s['entries']) for s in subcategories)
|
||||
starred_count = sum(1 for s in subcategories for e in s['entries'] if e['starred'])
|
||||
|
||||
return {
|
||||
'code': cat_info['code'],
|
||||
'name': cat_info['name'],
|
||||
'icon': cat_info['icon'],
|
||||
'slug': cat_key,
|
||||
'entry_count': total_entries,
|
||||
'starred_count': starred_count,
|
||||
'subcategory_count': len(subcategories),
|
||||
'subcategories': subcategories
|
||||
}
|
||||
|
||||
def sync():
|
||||
log("Starting sync...")
|
||||
|
||||
# Clone or pull
|
||||
if os.path.exists(os.path.join(REPO_DIR, '.git')):
|
||||
log("Pulling latest...")
|
||||
result = subprocess.run(['git', '-C', REPO_DIR, 'pull', '--ff-only'], capture_output=True, text=True)
|
||||
log(f"Git pull: {result.stdout.strip()}")
|
||||
if 'Already up to date' in result.stdout:
|
||||
log("No changes detected. Rebuilding anyway.")
|
||||
else:
|
||||
log("Cloning repo...")
|
||||
subprocess.run(['git', 'clone', '--depth', '1', REPO_URL, REPO_DIR], check=True)
|
||||
log("Clone complete.")
|
||||
|
||||
docs_dir = os.path.join(REPO_DIR, 'docs')
|
||||
if not os.path.exists(docs_dir):
|
||||
log(f"ERROR: docs dir not found at {docs_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Map filenames to category keys
|
||||
file_map = {}
|
||||
for fname in os.listdir(docs_dir):
|
||||
if not fname.endswith('.md'):
|
||||
continue
|
||||
key = fname.replace('.md', '').lower()
|
||||
# Skip non-category files
|
||||
if key in ('index', 'startpage', 'sandbox', 'posts', 'beginners-guide', 'feedback'):
|
||||
continue
|
||||
# Normalise keys
|
||||
key_norm = key.replace('-', '-')
|
||||
if key_norm in CATEGORY_MAP or key_norm.replace('-', '') in [k.replace('-', '') for k in CATEGORY_MAP]:
|
||||
file_map[fname] = key_norm if key_norm in CATEGORY_MAP else key
|
||||
else:
|
||||
file_map[fname] = key
|
||||
|
||||
log(f"Found {len(file_map)} category files to parse")
|
||||
|
||||
categories = []
|
||||
total_entries = 0
|
||||
total_starred = 0
|
||||
|
||||
for fname, cat_key in sorted(file_map.items()):
|
||||
filepath = os.path.join(docs_dir, fname)
|
||||
try:
|
||||
cat = parse_markdown_file(filepath, cat_key)
|
||||
categories.append(cat)
|
||||
total_entries += cat['entry_count']
|
||||
total_starred += cat['starred_count']
|
||||
log(f" {cat['code']} {cat['name']}: {cat['entry_count']} entries ({cat['starred_count']} starred)")
|
||||
except Exception as e:
|
||||
log(f" ERROR parsing {fname}: {e}")
|
||||
|
||||
# Sort by code
|
||||
categories.sort(key=lambda c: c['code'])
|
||||
|
||||
# Build output
|
||||
output = {
|
||||
'source': 'curated',
|
||||
'last_updated': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'total_entries': total_entries,
|
||||
'total_starred': total_starred,
|
||||
'total_categories': len(categories),
|
||||
'categories': categories
|
||||
}
|
||||
|
||||
# Write
|
||||
os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
|
||||
with open(OUTPUT, 'w', encoding='utf-8') as f:
|
||||
json.dump(output, f, ensure_ascii=False)
|
||||
|
||||
size_mb = os.path.getsize(OUTPUT) / (1024 * 1024)
|
||||
log(f"Output: {OUTPUT} ({size_mb:.1f} MB)")
|
||||
log(f"Total: {total_entries} entries, {total_starred} starred, {len(categories)} categories")
|
||||
|
||||
# Restart API
|
||||
log("Restarting jaeswift-api...")
|
||||
result = subprocess.run(['systemctl', 'restart', 'jaeswift-api'], capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
log("API restarted successfully.")
|
||||
else:
|
||||
log(f"API restart failed: {result.stderr}")
|
||||
|
||||
log("Sync complete.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
sync()
|
||||
Loading…
Add table
Reference in a new issue