fix: switch Reddit feeds to RSS to avoid 403 blocks
This commit is contained in:
parent
5d983ba86e
commit
371e8cbb7d
1 changed files with 17 additions and 20 deletions
|
|
@ -55,15 +55,18 @@ RSS_FEEDS = {
|
|||
|
||||
REDDIT_FEEDS = {
|
||||
'r_technology': {
|
||||
'url': 'https://www.reddit.com/r/technology/hot.json?limit=30',
|
||||
'url': 'https://www.reddit.com/r/technology/hot.rss?limit=30',
|
||||
'type': 'rss',
|
||||
'label': 'r/technology'
|
||||
},
|
||||
'r_programming': {
|
||||
'url': 'https://www.reddit.com/r/programming/hot.json?limit=30',
|
||||
'url': 'https://www.reddit.com/r/programming/hot.rss?limit=30',
|
||||
'type': 'rss',
|
||||
'label': 'r/programming'
|
||||
},
|
||||
'r_netsec': {
|
||||
'url': 'https://www.reddit.com/r/netsec/hot.json?limit=20',
|
||||
'url': 'https://www.reddit.com/r/netsec/hot.rss?limit=20',
|
||||
'type': 'rss',
|
||||
'label': 'r/netsec'
|
||||
}
|
||||
}
|
||||
|
|
@ -98,28 +101,22 @@ def fetch_hn_stories():
|
|||
|
||||
|
||||
def fetch_reddit_posts(sub_key, sub_config):
|
||||
"""Fetch Reddit posts via .json endpoint."""
|
||||
"""Fetch Reddit posts via RSS feed."""
|
||||
posts = []
|
||||
try:
|
||||
resp = requests.get(sub_config['url'], headers=REDDIT_HEADERS, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
children = data.get('data', {}).get('children', [])
|
||||
for child in children:
|
||||
d = child.get('data', {})
|
||||
if d.get('stickied'):
|
||||
continue
|
||||
created = d.get('created_utc', 0)
|
||||
pub = datetime.fromtimestamp(created, tz=timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') if created else ''
|
||||
feed = feedparser.parse(sub_config['url'])
|
||||
for entry in feed.entries:
|
||||
pub = ''
|
||||
if hasattr(entry, 'published_parsed') and entry.published_parsed:
|
||||
pub = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.published_parsed)
|
||||
elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
|
||||
pub = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.updated_parsed)
|
||||
posts.append({
|
||||
'title': d.get('title', 'Untitled'),
|
||||
'url': d.get('url', ''),
|
||||
'title': entry.get('title', 'Untitled'),
|
||||
'url': entry.get('link', ''),
|
||||
'source': sub_config['label'],
|
||||
'published': pub,
|
||||
'summary': (d.get('selftext', '') or '')[:300],
|
||||
'score': d.get('score', 0),
|
||||
'num_comments': d.get('num_comments', 0),
|
||||
'permalink': f"https://reddit.com{d.get('permalink', '')}"
|
||||
'summary': (entry.get('summary', '') or '')[:300]
|
||||
})
|
||||
log.info(f"Fetched {len(posts)} posts from {sub_config['label']}")
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue