fix: switch Reddit feeds to RSS to avoid 403 blocks

This commit is contained in:
jae 2026-04-06 15:24:20 +00:00
parent 5d983ba86e
commit 371e8cbb7d

View file

@ -55,15 +55,18 @@ RSS_FEEDS = {
REDDIT_FEEDS = {
'r_technology': {
'url': 'https://www.reddit.com/r/technology/hot.json?limit=30',
'url': 'https://www.reddit.com/r/technology/hot.rss?limit=30',
'type': 'rss',
'label': 'r/technology'
},
'r_programming': {
'url': 'https://www.reddit.com/r/programming/hot.json?limit=30',
'url': 'https://www.reddit.com/r/programming/hot.rss?limit=30',
'type': 'rss',
'label': 'r/programming'
},
'r_netsec': {
'url': 'https://www.reddit.com/r/netsec/hot.json?limit=20',
'url': 'https://www.reddit.com/r/netsec/hot.rss?limit=20',
'type': 'rss',
'label': 'r/netsec'
}
}
@ -98,28 +101,22 @@ def fetch_hn_stories():
def fetch_reddit_posts(sub_key, sub_config):
"""Fetch Reddit posts via .json endpoint."""
"""Fetch Reddit posts via RSS feed."""
posts = []
try:
resp = requests.get(sub_config['url'], headers=REDDIT_HEADERS, timeout=15)
resp.raise_for_status()
data = resp.json()
children = data.get('data', {}).get('children', [])
for child in children:
d = child.get('data', {})
if d.get('stickied'):
continue
created = d.get('created_utc', 0)
pub = datetime.fromtimestamp(created, tz=timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') if created else ''
feed = feedparser.parse(sub_config['url'])
for entry in feed.entries:
pub = ''
if hasattr(entry, 'published_parsed') and entry.published_parsed:
pub = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.published_parsed)
elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
pub = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.updated_parsed)
posts.append({
'title': d.get('title', 'Untitled'),
'url': d.get('url', ''),
'title': entry.get('title', 'Untitled'),
'url': entry.get('link', ''),
'source': sub_config['label'],
'published': pub,
'summary': (d.get('selftext', '') or '')[:300],
'score': d.get('score', 0),
'num_comments': d.get('num_comments', 0),
'permalink': f"https://reddit.com{d.get('permalink', '')}"
'summary': (entry.get('summary', '') or '')[:300]
})
log.info(f"Fetched {len(posts)} posts from {sub_config['label']}")
except Exception as e: