jaeswift-website/api/agent_routes.py
jae e73b74cfa2 feat(agent): agentic chat with wallet auth and tiered model routing
- New /api/agent/chat endpoint with Venice tool-calling loop (max 8 iter)
- Tiered models: glm-4.7-flash default, kimi-k2-thinking for Elite+
- Wallet auth: /api/auth/{nonce,verify,whoami,logout} with Ed25519 + JWT
- 10 tools registered: site search, crypto prices, SITREP, .sol lookup,
  wallet xray, contraband/awesomelist search, changelog, trigger_effect
- Per-tool rate limits, 30s timeout, \$30/mo budget guard
- Frontend: tier badge, tool call cards, wallet sign-in handshake
- Changelog v1.40.0
2026-04-20 10:40:27 +00:00

321 lines
12 KiB
Python

"""Agentic chat endpoint for JAE-AI.
POST /api/agent/chat
{messages: [{role, content}], session_id?}
Runs an agent loop calling Venice API with tool-calling support.
Tier-routed model selection, per-(identity, tool) rate limiting,
monthly output-token budget cap.
"""
import json
import os
import time
import calendar
import datetime
import threading
from pathlib import Path
from flask import Blueprint, request, jsonify
import requests as req
from auth_routes import read_session, fetch_sol_balance, _load_apikeys
from agent_tiers import compute_tier, pick_model, tier_allows
from agent_tools import (
TOOLS, get_allowed_tools, openai_tool_schemas, check_rate,
)
agent_bp = Blueprint('agent_bp', __name__, url_prefix='/api/agent')
DATA_DIR = Path(__file__).parent / 'data'
BUDGET_FILE = DATA_DIR / 'agent_budget.json'
# Venice API
VENICE_URL = 'https://api.venice.ai/api/v1/chat/completions'
# Model pricing ($ per 1M tokens) — input, output
MODEL_PRICING = {
'zai-org-glm-4.7-flash': (0.125, 0.50),
'kimi-k2-thinking': (0.75, 3.20),
}
MONTHLY_BUDGET_USD = 30.0
MAX_ITERATIONS = 8
MAX_MESSAGES = 40 # cap sent to venice
_budget_lock = threading.Lock()
SYSTEM_PROMPT = (
'You are JAE-AI, the agentic AI operator embedded at jaeswift.xyz — a military / cyberpunk '
'themed command centre. You speak like a tactical operator: terse, confident, a little cheeky. '
'You have access to tools to search the site, fetch live crypto prices, scan Solana wallets, '
'look up .sol domains, fetch SITREP briefings, and trigger sitewide visual effects on the '
"user's browser. Use tools proactively when helpful — don't just describe what you could do, "
'do it. After a tool returns, briefly explain the result in plain English. Keep replies under '
'200 words unless the user asks for detail. Never invent tool results. British English, occasional '
'profanity is fine, never censor words. Current date: {date}.'
)
def _load_budget() -> dict:
try:
with open(BUDGET_FILE) as f:
return json.load(f)
except Exception:
return {}
def _save_budget(b: dict):
try:
BUDGET_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(BUDGET_FILE, 'w') as f:
json.dump(b, f, indent=2)
except Exception as e:
print(f'[agent] budget save failed: {e}')
def _current_month_key() -> str:
return datetime.datetime.utcnow().strftime('%Y-%m')
def _estimate_cost(model: str, in_tokens: int, out_tokens: int) -> float:
in_p, out_p = MODEL_PRICING.get(model, (0.5, 1.0))
return (in_tokens * in_p + out_tokens * out_p) / 1_000_000
def _record_usage(model: str, in_tokens: int, out_tokens: int) -> tuple[float, float]:
"""Update budget. Returns (month_total_usd, remaining_usd)."""
with _budget_lock:
b = _load_budget()
key = _current_month_key()
month = b.setdefault(key, {'total_usd': 0.0, 'calls': 0, 'by_model': {}})
cost = _estimate_cost(model, in_tokens, out_tokens)
month['total_usd'] = round(month['total_usd'] + cost, 6)
month['calls'] = month['calls'] + 1
mm = month['by_model'].setdefault(model, {'calls': 0, 'in_tokens': 0, 'out_tokens': 0, 'usd': 0.0})
mm['calls'] += 1
mm['in_tokens'] += in_tokens
mm['out_tokens'] += out_tokens
mm['usd'] = round(mm['usd'] + cost, 6)
_save_budget(b)
return month['total_usd'], max(0.0, MONTHLY_BUDGET_USD - month['total_usd'])
def _budget_exceeded() -> bool:
b = _load_budget()
month = b.get(_current_month_key(), {})
return month.get('total_usd', 0) >= MONTHLY_BUDGET_USD
def _identity_key() -> str:
sess = read_session()
if sess and sess.get('address'):
return f"wallet:{sess['address']}"
return f"ip:{request.headers.get('X-Forwarded-For', request.remote_addr or 'unknown').split(',')[0].strip()}"
def _execute_tool(name: str, args: dict, identity: str) -> dict:
tool = TOOLS.get(name)
if not tool:
return {'error': f'unknown tool: {name}'}
# Rate limit check
rate_key = (identity, name)
if not check_rate(rate_key, tool.rate_limit):
return {'error': f'rate limit exceeded for {name} ({tool.rate_limit})'}
try:
return tool.handler(args or {})
except Exception as e:
return {'error': f'tool {name} failed: {e}'}
def _call_venice(model: str, messages: list, tools_schema: list, api_key: str) -> dict:
resp = req.post(
VENICE_URL,
headers={'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'},
json={
'model': model,
'messages': messages,
'tools': tools_schema if tools_schema else None,
'tool_choice': 'auto' if tools_schema else None,
'temperature': 0.7,
'max_tokens': 1500,
},
timeout=90,
)
if resp.status_code != 200:
raise RuntimeError(f'Venice HTTP {resp.status_code}: {resp.text[:400]}')
return resp.json()
@agent_bp.route('/chat', methods=['POST'])
def agent_chat():
# ── Identity / tier ──────────────────────────────────────────────────
sess = read_session()
if sess and sess.get('address'):
address = sess['address']
balance = fetch_sol_balance(address)
tier = compute_tier(address, balance)
else:
address = None
tier = 'anonymous'
# ── Budget guard ────────────────────────────────────────────────────
if _budget_exceeded():
return jsonify({
'error': 'Monthly agent budget exceeded — please try again next month',
'budget_usd': MONTHLY_BUDGET_USD,
}), 429
# ── Input ───────────────────────────────────────────────────────────
data = request.get_json(silent=True) or {}
messages_in = data.get('messages') or []
if not isinstance(messages_in, list) or not messages_in:
return jsonify({'error': 'messages array required'}), 400
memory_context = (data.get('memory_context') or '').strip()
# Sanitise + cap
cleaned = []
for m in messages_in[-MAX_MESSAGES:]:
role = m.get('role')
if role not in ('user', 'assistant', 'system', 'tool'):
continue
cleaned.append({k: v for k, v in m.items() if k in ('role', 'content', 'tool_calls', 'tool_call_id', 'name')})
if not cleaned:
return jsonify({'error': 'no valid messages'}), 400
# ── Build system prompt ─────────────────────────────────────────────
sys_prompt = SYSTEM_PROMPT.format(date=datetime.date.today().isoformat())
if memory_context and tier != 'anonymous':
sys_prompt = memory_context + '\n\n' + sys_prompt
if tier == 'elite':
sys_prompt += '\n\n[TIER: ELITE — $JAE holder. Greet them accordingly.]'
elif tier == 'admin':
sys_prompt += '\n\n[TIER: ADMIN — this is Jae, the site operator.]'
elif tier == 'operator':
sys_prompt += '\n\n[TIER: OPERATOR — wallet authenticated.]'
messages = [{'role': 'system', 'content': sys_prompt}] + cleaned
# ── Model + tools ───────────────────────────────────────────────────
model = pick_model(tier)
allowed = get_allowed_tools(tier)
tools_schema = openai_tool_schemas(allowed)
# ── API key ─────────────────────────────────────────────────────────
keys = _load_apikeys()
api_key = (keys.get('venice') or {}).get('api_key', '')
if not api_key:
return jsonify({'error': 'Venice API key not configured'}), 500
identity = _identity_key()
tool_trace = []
total_in = 0
total_out = 0
final_content = ''
frontend_actions = [] # effects etc. to execute client-side
try:
for iteration in range(MAX_ITERATIONS):
resp_json = _call_venice(model, messages, tools_schema, api_key)
usage = resp_json.get('usage') or {}
total_in += int(usage.get('prompt_tokens', 0) or 0)
total_out += int(usage.get('completion_tokens', 0) or 0)
choice = (resp_json.get('choices') or [{}])[0]
msg = choice.get('message') or {}
tool_calls = msg.get('tool_calls') or []
# Append assistant turn
assistant_turn = {'role': 'assistant', 'content': msg.get('content') or ''}
if tool_calls:
assistant_turn['tool_calls'] = tool_calls
messages.append(assistant_turn)
if not tool_calls:
final_content = msg.get('content') or msg.get('reasoning_content') or ''
break
# Execute tool calls
for tc in tool_calls:
fn = (tc.get('function') or {})
name = fn.get('name', '')
raw_args = fn.get('arguments') or '{}'
try:
args = json.loads(raw_args) if isinstance(raw_args, str) else (raw_args or {})
except Exception:
args = {}
result = _execute_tool(name, args, identity)
tool_trace.append({
'name': name, 'args': args, 'result': result,
'iteration': iteration,
})
# Capture frontend trigger_effect action
if isinstance(result, dict) and result.get('action') == 'trigger_effect':
frontend_actions.append({
'action': 'trigger_effect',
'effect': result.get('effect'),
})
messages.append({
'role': 'tool',
'tool_call_id': tc.get('id'),
'name': name,
'content': json.dumps(result, default=str)[:4000],
})
else:
final_content = final_content or '(agent reached iteration limit)'
except Exception as e:
return jsonify({
'error': f'agent failure: {e}',
'tool_calls': tool_trace,
'tier': tier,
'model_used': model,
}), 502
# ── Record usage ────────────────────────────────────────────────────
month_total, remaining = _record_usage(model, total_in, total_out)
return jsonify({
'content': final_content,
'tool_calls': tool_trace,
'frontend_actions': frontend_actions,
'model_used': model,
'tier': tier,
'authenticated': bool(address),
'address': address,
'tokens': {'in': total_in, 'out': total_out},
'budget': {
'month_usd': round(month_total, 4),
'remaining_usd': round(remaining, 4),
'cap_usd': MONTHLY_BUDGET_USD,
},
})
@agent_bp.route('/tools', methods=['GET'])
def list_tools():
"""Debug: list tools available to current tier."""
sess = read_session()
address = sess.get('address') if sess else None
balance = fetch_sol_balance(address) if address else 0.0
tier = compute_tier(address, balance) if address else 'anonymous'
allowed = get_allowed_tools(tier)
return jsonify({
'tier': tier,
'model': pick_model(tier),
'tools': [{'name': t.name, 'tier': t.tier, 'rate_limit': t.rate_limit,
'description': t.description} for t in allowed],
})
@agent_bp.route('/budget', methods=['GET'])
def budget_status():
b = _load_budget()
month = b.get(_current_month_key(), {'total_usd': 0.0, 'calls': 0, 'by_model': {}})
return jsonify({
'month': _current_month_key(),
'cap_usd': MONTHLY_BUDGET_USD,
'total_usd': round(month.get('total_usd', 0), 4),
'remaining_usd': round(max(0.0, MONTHLY_BUDGET_USD - month.get('total_usd', 0)), 4),
'calls': month.get('calls', 0),
'by_model': month.get('by_model', {}),
})