feat: unified tool-server + Agent Zero-inspired system prompt
Some checks are pending
CI / build-check-test (push) Waiting to run

- Merge 3 servers into single tool-server.mjs on port 7700
  - HTTP API: POST /api/bash, /api/browser/*
  - WebSocket: /ws/terminal (xterm.js panel)
  - WebSocket: /ws/browser (live browser panel)
- SHARED Playwright instance between LLM browser tool and user panel
  - When AI navigates a page, user sees it live in browser panel
  - When user clicks in panel, AI tools see the same page state
- Remove standalone terminal-server.mjs (was :7701)
- Remove standalone browser-server.mjs (was :7702)
- Update browser-panel.ts: ws://localhost:7700/ws/browser
- Update terminal-panel.ts: ws://localhost:7700/ws/terminal
- Agent Zero-inspired system prompt with:
  - Structured problem-solving methodology (analyse/plan/execute/verify/report)
  - Clear tool usage rules (no tools for casual chat)
  - Detailed tool descriptions with usage guidance
  - Resourceful retry behaviour on failures
- npm run dev starts both vite + unified server via concurrently
This commit is contained in:
JAE 2026-03-27 04:13:17 +00:00
parent 4c09f71351
commit fedc60fd0f
10 changed files with 435 additions and 292 deletions

View file

@ -1,65 +0,0 @@
import { WebSocketServer } from 'ws';
import { chromium } from 'playwright';
const PORT = 7702;
const wss = new WebSocketServer({ port: PORT });
console.log(`Browser WS server on ws://localhost:${PORT}`);
let browser = null;
async function getBrowser() {
if (!browser) browser = await chromium.launch({ headless: true, args: ['--no-sandbox','--disable-setuid-sandbox'] });
return browser;
}
wss.on('connection', async (ws) => {
let context = null;
let page = null;
async function screenshot() {
if (!page) return;
try {
const buf = await page.screenshot({ type: 'jpeg', quality: 70, fullPage: false });
ws.send(JSON.stringify({ type: 'screenshot', data: buf.toString('base64'), url: page.url() }));
} catch(e) { ws.send(JSON.stringify({ type: 'error', msg: String(e) })); }
}
async function navigate(url) {
try {
if (!context) {
const b = await getBrowser();
context = await b.newContext({ viewport: { width: 1280, height: 800 } });
page = await context.newPage();
}
if (!url.startsWith('http')) url = 'https://' + url;
ws.send(JSON.stringify({ type: 'loading' }));
await page.goto(url, { timeout: 30000, waitUntil: 'domcontentloaded' });
await screenshot();
} catch(e) { ws.send(JSON.stringify({ type: 'error', msg: String(e) })); }
}
ws.on('message', async (msg) => {
try {
const m = JSON.parse(msg.toString());
if (m.type === 'navigate') await navigate(m.url);
if (m.type === 'screenshot') await screenshot();
if (m.type === 'click') {
if (page) { await page.mouse.click(m.x, m.y); await screenshot(); }
}
if (m.type === 'scroll') {
if (page) { await page.mouse.wheel(0, m.dy); await screenshot(); }
}
if (m.type === 'type') {
if (page) { await page.keyboard.type(m.text); await screenshot(); }
}
if (m.type === 'back') { if (page) { await page.goBack(); await screenshot(); } }
if (m.type === 'fwd') { if (page) { await page.goForward(); await screenshot(); } }
if (m.type === 'reload'){ if (page) { await page.reload(); await screenshot(); } }
} catch(e) { ws.send(JSON.stringify({ type: 'error', msg: String(e) })); }
});
ws.on('close', async () => { if (context) await context.close().catch(()=>{}); context = null; page = null; });
// Send welcome screenshot placeholder
ws.send(JSON.stringify({ type: 'ready' }));
});

View file

@ -1,27 +0,0 @@
import { WebSocketServer } from 'ws';
import { spawn } from 'child_process';
const PORT = 7701;
const wss = new WebSocketServer({ port: PORT });
console.log(`Terminal WS server on ws://localhost:${PORT}`);
wss.on('connection', (ws) => {
const shell = spawn('/bin/bash', [], {
env: { ...process.env, TERM: 'xterm-256color', COLORTERM: 'truecolor' },
cwd: process.env.HOME || '/root',
});
shell.stdout.on('data', (d) => { try { ws.send(JSON.stringify({ type:'data', data: d.toString('binary') })); } catch{} });
shell.stderr.on('data', (d) => { try { ws.send(JSON.stringify({ type:'data', data: d.toString('binary') })); } catch{} });
shell.on('close', (code) => { try { ws.send(JSON.stringify({ type:'exit', code })); ws.close(); } catch{} });
ws.on('message', (msg) => {
try {
const m = JSON.parse(msg.toString());
if (m.type === 'input') shell.stdin.write(m.data);
if (m.type === 'resize') { /* no node-pty resize without pty, best effort */ }
} catch{}
});
ws.on('close', () => { shell.kill(); });
});

View file

@ -1,12 +1,13 @@
import http from 'http'; import http from 'http';
import { exec } from 'child_process'; import { exec } from 'child_process';
import { chromium } from 'playwright'; import { chromium } from 'playwright';
import { WebSocketServer, WebSocket } from 'ws';
import { spawn } from 'child_process';
import url from 'url';
const PORT = parseInt(process.env.TOOL_SERVER_PORT || '7700'); const PORT = parseInt(process.env.TOOL_SERVER_PORT || '7700');
let browser = null;
let context = null;
let page = null;
// ── CORS ──────────────────────────────────────────────────────
const cors = { const cors = {
'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'POST, GET, OPTIONS', 'Access-Control-Allow-Methods': 'POST, GET, OPTIONS',
@ -24,12 +25,19 @@ function parseBody(req) {
}); });
} }
// ── SHARED PLAYWRIGHT BROWSER ─────────────────────────────────
let browser = null;
let context = null;
let page = null;
const browserPanelClients = new Set(); // WS clients watching the browser
async function launchBrowser() { async function launchBrowser() {
if (!browser) { if (!browser) {
browser = await chromium.launch({ browser = await chromium.launch({
headless: true, headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
}); });
console.log('[tool-server] Playwright browser launched');
} }
return browser; return browser;
} }
@ -50,6 +58,24 @@ async function snap() {
return { screenshot: buf.toString('base64'), url: p.url(), title: await p.title() }; return { screenshot: buf.toString('base64'), url: p.url(), title: await p.title() };
} }
// Broadcast screenshot to all connected browser panel WebSocket clients
async function broadcastScreenshot() {
if (browserPanelClients.size === 0) return;
try {
const s = await snap();
const msg = JSON.stringify({ type: 'screenshot', data: s.screenshot, url: s.url, title: s.title });
for (const ws of browserPanelClients) {
if (ws.readyState === WebSocket.OPEN) ws.send(msg);
}
} catch (e) {
const errMsg = JSON.stringify({ type: 'error', msg: String(e) });
for (const ws of browserPanelClients) {
if (ws.readyState === WebSocket.OPEN) ws.send(errMsg);
}
}
}
// ── BASH HANDLER ──────────────────────────────────────────────
async function handleBash(body) { async function handleBash(body) {
const { command, timeout = 30000 } = body; const { command, timeout = 30000 } = body;
if (!command) return { error: 'No command provided' }; if (!command) return { error: 'No command provided' };
@ -70,20 +96,25 @@ async function handleBash(body) {
}); });
} }
// ── BROWSER HTTP HANDLERS (used by LLM tool) ─────────────────
async function handleNavigate(body) { async function handleNavigate(body) {
const { url } = body; const { url: targetUrl } = body;
if (!url) return { error: 'No URL' }; if (!targetUrl) return { error: 'No URL' };
const p = await getPage(); const p = await getPage();
const target = url.startsWith('http') ? url : 'https://' + url; const target = targetUrl.startsWith('http') ? targetUrl : 'https://' + targetUrl;
await p.goto(target, { timeout: 30000, waitUntil: 'domcontentloaded' }); await p.goto(target, { timeout: 30000, waitUntil: 'domcontentloaded' });
return snap(); const result = await snap();
broadcastScreenshot(); // sync panel
return result;
} }
async function handleClick(body) { async function handleClick(body) {
const p = await getPage(); const p = await getPage();
await p.mouse.click(body.x || 0, body.y || 0); await p.mouse.click(body.x || 0, body.y || 0);
await p.waitForTimeout(500); await p.waitForTimeout(500);
return snap(); const result = await snap();
broadcastScreenshot();
return result;
} }
async function handleType(body) { async function handleType(body) {
@ -91,20 +122,42 @@ async function handleType(body) {
if (body.selector) await p.fill(body.selector, body.text || ''); if (body.selector) await p.fill(body.selector, body.text || '');
else await p.keyboard.type(body.text || ''); else await p.keyboard.type(body.text || '');
await p.waitForTimeout(300); await p.waitForTimeout(300);
return snap(); const result = await snap();
broadcastScreenshot();
return result;
} }
async function handleScroll(body) { async function handleScroll(body) {
const p = await getPage(); const p = await getPage();
await p.mouse.wheel(0, body.dy || 300); await p.mouse.wheel(0, body.dy || 300);
await p.waitForTimeout(300); await p.waitForTimeout(300);
return snap(); const result = await snap();
broadcastScreenshot();
return result;
} }
async function handleBack() { async function handleBack() {
const p = await getPage(); const p = await getPage();
await p.goBack({ timeout: 10000 }).catch(() => {}); await p.goBack({ timeout: 10000 }).catch(() => {});
return snap(); const result = await snap();
broadcastScreenshot();
return result;
}
async function handleForward() {
const p = await getPage();
await p.goForward({ timeout: 10000 }).catch(() => {});
const result = await snap();
broadcastScreenshot();
return result;
}
async function handleReload() {
const p = await getPage();
await p.reload({ timeout: 15000 }).catch(() => {});
const result = await snap();
broadcastScreenshot();
return result;
} }
async function handleText() { async function handleText() {
@ -117,9 +170,11 @@ async function handleEval(body) {
const p = await getPage(); const p = await getPage();
const result = await p.evaluate(body.script || 'null'); const result = await p.evaluate(body.script || 'null');
const ss = await snap(); const ss = await snap();
broadcastScreenshot();
return { ...ss, evalResult: String(result) }; return { ...ss, evalResult: String(result) };
} }
// ── HTTP ROUTES ───────────────────────────────────────────────
const routes = { const routes = {
'/api/bash': handleBash, '/api/bash': handleBash,
'/api/browser/navigate': handleNavigate, '/api/browser/navigate': handleNavigate,
@ -127,14 +182,17 @@ const routes = {
'/api/browser/type': handleType, '/api/browser/type': handleType,
'/api/browser/scroll': handleScroll, '/api/browser/scroll': handleScroll,
'/api/browser/back': handleBack, '/api/browser/back': handleBack,
'/api/browser/screenshot': () => snap(), '/api/browser/forward': handleForward,
'/api/browser/reload': handleReload,
'/api/browser/screenshot': () => { const r = snap(); broadcastScreenshot(); return r; },
'/api/browser/text': handleText, '/api/browser/text': handleText,
'/api/browser/eval': handleEval, '/api/browser/eval': handleEval,
}; };
http.createServer(async (req, res) => { // ── HTTP SERVER ───────────────────────────────────────────────
const server = http.createServer(async (req, res) => {
if (req.method === 'OPTIONS') { res.writeHead(204, cors); res.end(); return; } if (req.method === 'OPTIONS') { res.writeHead(204, cors); res.end(); return; }
if (req.url === '/health') { res.writeHead(200, cors); res.end(JSON.stringify({ ok: true })); return; } if (req.url === '/health') { res.writeHead(200, cors); res.end(JSON.stringify({ ok: true, browser: !!browser })); return; }
const handler = routes[req.url]; const handler = routes[req.url];
if (req.method === 'POST' && handler) { if (req.method === 'POST' && handler) {
try { try {
@ -150,6 +208,113 @@ http.createServer(async (req, res) => {
} }
res.writeHead(404, cors); res.writeHead(404, cors);
res.end(JSON.stringify({ error: 'Not found' })); res.end(JSON.stringify({ error: 'Not found' }));
}).listen(PORT, () => { });
console.log(`[tool-server] listening on :${'PORT'}`);
// ── WEBSOCKET: TERMINAL (/ws/terminal) ────────────────────────
const terminalWss = new WebSocketServer({ noServer: true });
terminalWss.on('connection', (ws) => {
console.log('[tool-server] Terminal WS client connected');
const shell = spawn('/bin/bash', [], {
env: { ...process.env, TERM: 'xterm-256color', COLORTERM: 'truecolor' },
cwd: process.env.HOME || '/root',
});
shell.stdout.on('data', (d) => { try { ws.send(JSON.stringify({ type: 'data', data: d.toString('binary') })); } catch {} });
shell.stderr.on('data', (d) => { try { ws.send(JSON.stringify({ type: 'data', data: d.toString('binary') })); } catch {} });
shell.on('close', (code) => { try { ws.send(JSON.stringify({ type: 'exit', code })); ws.close(); } catch {} });
ws.on('message', (msg) => {
try {
const m = JSON.parse(msg.toString());
if (m.type === 'input') shell.stdin.write(m.data);
if (m.type === 'resize') { /* best effort without node-pty */ }
} catch {}
});
ws.on('close', () => { shell.kill(); });
});
// ── WEBSOCKET: BROWSER PANEL (/ws/browser) ────────────────────
const browserWss = new WebSocketServer({ noServer: true });
browserWss.on('connection', async (ws) => {
console.log('[tool-server] Browser panel WS client connected');
browserPanelClients.add(ws);
ws.on('close', () => browserPanelClients.delete(ws));
ws.on('error', () => browserPanelClients.delete(ws));
// Handle panel user interactions (navigate, click, scroll, etc.)
ws.on('message', async (msg) => {
try {
const m = JSON.parse(msg.toString());
if (m.type === 'navigate') {
const p = await getPage();
const target = m.url.startsWith('http') ? m.url : 'https://' + m.url;
ws.send(JSON.stringify({ type: 'loading' }));
await p.goto(target, { timeout: 30000, waitUntil: 'domcontentloaded' });
await broadcastScreenshot();
}
if (m.type === 'click') {
const p = await getPage();
await p.mouse.click(m.x || 0, m.y || 0);
await p.waitForTimeout(500);
await broadcastScreenshot();
}
if (m.type === 'scroll') {
const p = await getPage();
await p.mouse.wheel(0, m.dy || 300);
await p.waitForTimeout(300);
await broadcastScreenshot();
}
if (m.type === 'type') {
const p = await getPage();
await p.keyboard.type(m.text || '');
await p.waitForTimeout(300);
await broadcastScreenshot();
}
if (m.type === 'back') {
const p = await getPage();
await p.goBack({ timeout: 10000 }).catch(() => {});
await broadcastScreenshot();
}
if (m.type === 'fwd') {
const p = await getPage();
await p.goForward({ timeout: 10000 }).catch(() => {});
await broadcastScreenshot();
}
if (m.type === 'reload') {
const p = await getPage();
await p.reload({ timeout: 15000 }).catch(() => {});
await broadcastScreenshot();
}
if (m.type === 'screenshot') {
await broadcastScreenshot();
}
} catch (e) {
ws.send(JSON.stringify({ type: 'error', msg: String(e) }));
}
});
// Send initial ready + screenshot if browser exists
ws.send(JSON.stringify({ type: 'ready' }));
if (page && !page.isClosed()) {
try { await broadcastScreenshot(); } catch {}
}
});
// ── UPGRADE HANDLER (route WS by path) ────────────────────────
server.on('upgrade', (req, socket, head) => {
const pathname = url.parse(req.url).pathname;
if (pathname === '/ws/terminal') {
terminalWss.handleUpgrade(req, socket, head, (ws) => terminalWss.emit('connection', ws, req));
} else if (pathname === '/ws/browser') {
browserWss.handleUpgrade(req, socket, head, (ws) => browserWss.emit('connection', ws, req));
} else {
socket.destroy();
}
});
// ── START ─────────────────────────────────────────────────────
server.listen(PORT, () => {
console.log(`[tool-server] Unified server on http://localhost:${PORT}`);
console.log(`[tool-server] HTTP API: POST /api/bash, /api/browser/*`);
console.log(`[tool-server] Terminal WS: ws://localhost:${PORT}/ws/terminal`);
console.log(`[tool-server] Browser WS: ws://localhost:${PORT}/ws/browser`);
console.log(`[tool-server] Health: GET /health`);
}); });

View file

@ -32,7 +32,7 @@ export class JaeBrowserPanel extends LitElement {
} }
connect() { connect() {
this.ws = new WebSocket("ws://localhost:7702"); this.ws = new WebSocket("ws://localhost:7700/ws/browser");
this.ws.onopen = () => { this.ws.onopen = () => {
this.connected = true; this.connected = true;
this.requestUpdate(); this.requestUpdate();

View file

@ -68,7 +68,7 @@ export class JaeTerminalPanel extends LitElement {
this.term.open(this.container); this.term.open(this.container);
this.fitAddon.fit(); this.fitAddon.fit();
this.ws = new WebSocket("ws://localhost:7701"); this.ws = new WebSocket("ws://localhost:7700/ws/terminal");
this.ws.onopen = () => { this.ws.onopen = () => {
this.connected = true; this.connected = true;
this.connecting = false; this.connecting = false;

View file

@ -315,21 +315,69 @@ const createAgent = async (initialState?: Partial<AgentState>) => {
if (agentUnsubscribe) agentUnsubscribe(); if (agentUnsubscribe) agentUnsubscribe();
agent = new Agent({ agent = new Agent({
initialState: initialState || { initialState: initialState || {
systemPrompt: `You are JAE, a helpful AI coding assistant. Respond in plain text by default. systemPrompt: `You are JAE, an autonomous AI coding assistant with tool access. You solve tasks methodically using available tools and report results clearly.
CRITICAL: Do NOT call any tools unless the user EXPLICITLY asks for something that requires a tool. Greetings, questions, casual chat, and general conversation require ZERO tools just reply with plain text. Never create artifacts, files, or run code unless specifically requested. ## Your Role
- Expert coding assistant specialising in software development, debugging, research, and creative tasks
- You have direct access to a Linux terminal, a web browser, web search, image generation, a JavaScript sandbox, text-to-speech, and persistent memory
- You execute tasks yourself never instruct the user to do something you can do with your tools
- Be conversational for casual chat (no tools needed), but switch to methodical tool use when given a real task
Available tools (use ONLY when explicitly requested): ## Communication Rules
- bash: Run shell commands - For greetings, questions, explanations, or casual chat: respond in plain text only, NO tool calls
- browser: Browse the web - For tasks requiring action: think step-by-step, use tools, report results
- web_search: Search the internet - Use markdown formatting for structured responses (headers, code blocks, lists, tables)
- image_gen: Generate images - Be direct and concise avoid filler phrases
- javascript_repl: Run JS code or create HTML artifacts
- tts: Text to speech
- memory: Save/recall information
DO NOT use tools for: "hi", "hello", "how are you", "what is X", "explain Y", "testing", "test", or any general question. ## Problem-Solving Methodology
ONLY use tools for: "search for X", "run this code", "create a webpage", "generate an image of X", "open google.com", etc.`, When given a task that requires tools:
1. **Analyse**: Break down what needs to be done
2. **Plan**: Outline your approach in 2-3 sentences
3. **Execute**: Use the appropriate tools step by step
4. **Verify**: Check the output and confirm success
5. **Report**: Present results clearly to the user
Do not give up easily. If a tool fails, try an alternative approach. Be resourceful.
## Available Tools
### bash
Execute shell commands on the host system. Use for: file operations, installing packages, running scripts, system tasks, git commands.
- Always check command output for errors
- Use for quick tasks like 'ls', 'cat', 'grep', 'curl', etc.
### browser
Control a headless Chromium browser. Actions: navigate (url), click (x,y), type (text), scroll (dy), back, screenshot, text (extract page text), eval (run JS in page).
- Navigate first, then interact
- Use 'text' action to read page content for the LLM
- The user can see the browser live in the right panel
### web_search
Search the internet via DuckDuckGo. Use for: finding current information, researching topics, looking up documentation.
- Search first, then use browser to visit promising results if needed
### javascript_repl
Run JavaScript code in a sandboxed environment. Use for: calculations, data processing, creating HTML/SVG artifacts that render inline.
- Return HTML strings to create visual artifacts
- Great for charts, diagrams, interactive demos
### image_gen
Generate images using AI. Use when the user asks to create, draw, or generate an image.
### tts
Convert text to speech audio. Use when asked to read aloud or generate audio.
### memory_save / memory_query / memory_delete
Persist information across sessions. Use to save important context, user preferences, or project details.
- Save key facts the user tells you about themselves or their projects
- Query memory when context from past conversations might be relevant
## Critical Rules
- **NO tools for casual chat**: "hi", "how are you", "what is X", "explain Y" plain text response
- **YES tools for action requests**: "search for X", "run this code", "open google.com", "create a webpage", "find out about..." use tools
- Never create files or artifacts unless the user asks for them
- Never run code unless the user asks you to
- If unsure whether to use a tool, respond with text and ask for clarification`,
model: getModel("venice", "llama-3.3-70b"), model: getModel("venice", "llama-3.3-70b"),
thinkingLevel: "off", thinkingLevel: "off",
messages: [], messages: [],

View file

@ -101,6 +101,8 @@ export { ImageArtifact } from "./tools/artifacts/ImageArtifact.js";
export { MarkdownArtifact } from "./tools/artifacts/MarkdownArtifact.js"; export { MarkdownArtifact } from "./tools/artifacts/MarkdownArtifact.js";
export { SvgArtifact } from "./tools/artifacts/SvgArtifact.js"; export { SvgArtifact } from "./tools/artifacts/SvgArtifact.js";
export { TextArtifact } from "./tools/artifacts/TextArtifact.js"; export { TextArtifact } from "./tools/artifacts/TextArtifact.js";
export { bashTool, createBashTool } from "./tools/bash-tool.js";
export { browserTool, createBrowserTool } from "./tools/browser-tool.js";
export { createExtractDocumentTool, extractDocumentTool } from "./tools/extract-document.js"; export { createExtractDocumentTool, extractDocumentTool } from "./tools/extract-document.js";
export { createImageGenTool, imageGenTool } from "./tools/image-gen.js"; export { createImageGenTool, imageGenTool } from "./tools/image-gen.js";
// Tools // Tools
@ -119,8 +121,6 @@ export type { ToolRenderer, ToolRenderResult } from "./tools/types.js";
export { createTTSTool, ttsTool } from "./tools/voice-tts.js"; export { createTTSTool, ttsTool } from "./tools/voice-tts.js";
// Venice / community tools // Venice / community tools
export { createWebSearchTool, webSearchTool } from "./tools/web-search.js"; export { createWebSearchTool, webSearchTool } from "./tools/web-search.js";
export { createBashTool, bashTool } from "./tools/bash-tool.js";
export { createBrowserTool, browserTool } from "./tools/browser-tool.js";
export type { Attachment } from "./utils/attachment-utils.js"; export type { Attachment } from "./utils/attachment-utils.js";
// Utils // Utils
export { loadAttachment } from "./utils/attachment-utils.js"; export { loadAttachment } from "./utils/attachment-utils.js";

View file

@ -6,84 +6,86 @@ import { Terminal } from "lucide";
import { registerToolRenderer, renderHeader } from "./renderer-registry.js"; import { registerToolRenderer, renderHeader } from "./renderer-registry.js";
import type { ToolRenderer, ToolRenderResult } from "./types.js"; import type { ToolRenderer, ToolRenderResult } from "./types.js";
const TOOL_SERVER = typeof window !== "undefined" const TOOL_SERVER =
? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700" typeof window !== "undefined"
: "http://localhost:7700"; ? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700"
: "http://localhost:7700";
const bashSchema = Type.Object({ const bashSchema = Type.Object({
command: Type.String({ description: "Shell command to execute" }), command: Type.String({ description: "Shell command to execute" }),
timeout: Type.Optional(Type.Number({ description: "Timeout in ms (default: 30000)" })), timeout: Type.Optional(Type.Number({ description: "Timeout in ms (default: 30000)" })),
}); });
export interface BashDetails { export interface BashDetails {
stdout: string; stdout: string;
stderr: string; stderr: string;
exitCode: number; exitCode: number;
command: string; command: string;
} }
export const bashTool: AgentTool<typeof bashSchema, BashDetails> = { export const bashTool: AgentTool<typeof bashSchema, BashDetails> = {
name: "bash", name: "bash",
label: "Terminal", label: "Terminal",
description: "Execute a shell command on the server. Use for file operations, installing packages, running scripts, git commands, etc.", description:
parameters: bashSchema, "Execute a shell command on the server. Use for file operations, installing packages, running scripts, git commands, etc.",
async execute(toolCallId, params, signal) { parameters: bashSchema,
const { command, timeout = 30000 } = params; async execute(toolCallId, params, signal) {
try { const { command, timeout = 30000 } = params;
const res = await fetch(TOOL_SERVER + "/api/bash", { try {
method: "POST", const res = await fetch(TOOL_SERVER + "/api/bash", {
headers: { "Content-Type": "application/json" }, method: "POST",
body: JSON.stringify({ command, timeout }), headers: { "Content-Type": "application/json" },
signal, body: JSON.stringify({ command, timeout }),
}); signal,
const data = await res.json() as any; });
if (data.error) { const data = (await res.json()) as any;
return { if (data.error) {
content: [{ type: "text" as const, text: "Error: " + data.error }], return {
details: { stdout: "", stderr: data.error, exitCode: 1, command }, content: [{ type: "text" as const, text: "Error: " + data.error }],
}; details: { stdout: "", stderr: data.error, exitCode: 1, command },
} };
const output = data.output || data.stdout || ""; }
return { const output = data.output || data.stdout || "";
content: [{ type: "text" as const, text: output.slice(0, 10000) || "(no output)" }], return {
details: { stdout: data.stdout, stderr: data.stderr, exitCode: data.exitCode, command }, content: [{ type: "text" as const, text: output.slice(0, 10000) || "(no output)" }],
}; details: { stdout: data.stdout, stderr: data.stderr, exitCode: data.exitCode, command },
} catch (err: any) { };
return { } catch (err: any) {
content: [{ type: "text" as const, text: "Bash tool error: " + err.message }], return {
details: { stdout: "", stderr: err.message, exitCode: 1, command }, content: [{ type: "text" as const, text: "Bash tool error: " + err.message }],
}; details: { stdout: "", stderr: err.message, exitCode: 1, command },
} };
}, }
},
}; };
class BashToolRenderer implements ToolRenderer<{ command: string }, BashDetails> { class BashToolRenderer implements ToolRenderer<{ command: string }, BashDetails> {
render( render(
params: { command: string } | undefined, params: { command: string } | undefined,
result: ToolResultMessage<BashDetails> | undefined, result: ToolResultMessage<BashDetails> | undefined,
): ToolRenderResult { ): ToolRenderResult {
const state = result ? (result.isError ? "error" : "complete") : "inprogress"; const state = result ? (result.isError ? "error" : "complete") : "inprogress";
const cmd = result?.details?.command || params?.command || "..."; const cmd = result?.details?.command || params?.command || "...";
const exitCode = result?.details?.exitCode; const exitCode = result?.details?.exitCode;
const icon = state === "error" ? "text-red-500" : exitCode === 0 ? "text-green-500" : ""; const icon = state === "error" ? "text-red-500" : exitCode === 0 ? "text-green-500" : "";
if (result?.details) { if (result?.details) {
const d = result.details; const d = result.details;
return { return {
content: html` content: html`
<div class="flex flex-col gap-2"> <div class="flex flex-col gap-2">
${renderHeader(state, Terminal, "$ " + cmd)} ${renderHeader(state, Terminal, "$ " + cmd)}
<pre class="text-xs bg-black text-green-400 p-3 rounded overflow-auto max-h-64 whitespace-pre-wrap font-mono">${d.stdout}${d.stderr ? "\nSTDERR: " + d.stderr : ""}</pre> <pre class="text-xs bg-black text-green-400 p-3 rounded overflow-auto max-h-64 whitespace-pre-wrap font-mono">${d.stdout}${d.stderr ? "\nSTDERR: " + d.stderr : ""}</pre>
<span class="text-xs ${icon}">Exit code: ${d.exitCode}</span> <span class="text-xs ${icon}">Exit code: ${d.exitCode}</span>
</div>`, </div>`,
isCustom: false, isCustom: false,
}; };
} }
return { content: renderHeader(state, Terminal, "$ " + cmd), isCustom: false }; return { content: renderHeader(state, Terminal, "$ " + cmd), isCustom: false };
} }
} }
registerToolRenderer("bash", new BashToolRenderer()); registerToolRenderer("bash", new BashToolRenderer());
export function createBashTool(): AgentTool<typeof bashSchema, BashDetails> { export function createBashTool(): AgentTool<typeof bashSchema, BashDetails> {
return bashTool; return bashTool;
} }

View file

@ -6,111 +6,132 @@ import { Globe } from "lucide";
import { registerToolRenderer, renderHeader } from "./renderer-registry.js"; import { registerToolRenderer, renderHeader } from "./renderer-registry.js";
import type { ToolRenderer, ToolRenderResult } from "./types.js"; import type { ToolRenderer, ToolRenderResult } from "./types.js";
const TOOL_SERVER = typeof window !== "undefined" const TOOL_SERVER =
? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700" typeof window !== "undefined"
: "http://localhost:7700"; ? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700"
: "http://localhost:7700";
const browserSchema = Type.Object({ const browserSchema = Type.Object({
action: Type.Union([ action: Type.Union(
Type.Literal("navigate"), [
Type.Literal("click"), Type.Literal("navigate"),
Type.Literal("type"), Type.Literal("click"),
Type.Literal("scroll"), Type.Literal("type"),
Type.Literal("back"), Type.Literal("scroll"),
Type.Literal("screenshot"), Type.Literal("back"),
Type.Literal("text"), Type.Literal("screenshot"),
Type.Literal("eval"), Type.Literal("text"),
], { description: "Browser action to perform" }), Type.Literal("eval"),
url: Type.Optional(Type.String({ description: "URL to navigate to" })), ],
x: Type.Optional(Type.Number({ description: "Click X coordinate" })), { description: "Browser action to perform" },
y: Type.Optional(Type.Number({ description: "Click Y coordinate" })), ),
text: Type.Optional(Type.String({ description: "Text to type" })), url: Type.Optional(Type.String({ description: "URL to navigate to" })),
selector: Type.Optional(Type.String({ description: "CSS selector to type into" })), x: Type.Optional(Type.Number({ description: "Click X coordinate" })),
dy: Type.Optional(Type.Number({ description: "Scroll delta Y pixels" })), y: Type.Optional(Type.Number({ description: "Click Y coordinate" })),
script: Type.Optional(Type.String({ description: "JavaScript to evaluate in page" })), text: Type.Optional(Type.String({ description: "Text to type" })),
selector: Type.Optional(Type.String({ description: "CSS selector to type into" })),
dy: Type.Optional(Type.Number({ description: "Scroll delta Y pixels" })),
script: Type.Optional(Type.String({ description: "JavaScript to evaluate in page" })),
}); });
export interface BrowserDetails { export interface BrowserDetails {
action: string; action: string;
url?: string; url?: string;
title?: string; title?: string;
screenshot?: string; screenshot?: string;
text?: string; text?: string;
evalResult?: string; evalResult?: string;
error?: string; error?: string;
} }
export const browserTool: AgentTool<typeof browserSchema, BrowserDetails> = { export const browserTool: AgentTool<typeof browserSchema, BrowserDetails> = {
name: "browser", name: "browser",
label: "Browser", label: "Browser",
description: "Control a headless browser. Actions: navigate (url), click (x,y), type (text, optional selector), scroll (dy), back, screenshot, text (get page text), eval (run JS).", description:
parameters: browserSchema, "Control a headless browser. Actions: navigate (url), click (x,y), type (text, optional selector), scroll (dy), back, screenshot, text (get page text), eval (run JS).",
async execute(toolCallId, params, signal) { parameters: browserSchema,
const { action, ...rest } = params; async execute(toolCallId, params, signal) {
const endpoint = action === "navigate" ? "/api/browser/navigate" const { action, ...rest } = params;
: action === "click" ? "/api/browser/click" const endpoint =
: action === "type" ? "/api/browser/type" action === "navigate"
: action === "scroll" ? "/api/browser/scroll" ? "/api/browser/navigate"
: action === "back" ? "/api/browser/back" : action === "click"
: action === "screenshot" ? "/api/browser/screenshot" ? "/api/browser/click"
: action === "text" ? "/api/browser/text" : action === "type"
: action === "eval" ? "/api/browser/eval" ? "/api/browser/type"
: null; : action === "scroll"
if (!endpoint) { ? "/api/browser/scroll"
return { : action === "back"
content: [{ type: "text" as const, text: "Unknown action: " + action }], ? "/api/browser/back"
details: { action, error: "Unknown action" }, : action === "screenshot"
}; ? "/api/browser/screenshot"
} : action === "text"
try { ? "/api/browser/text"
const res = await fetch(TOOL_SERVER + endpoint, { : action === "eval"
method: "POST", ? "/api/browser/eval"
headers: { "Content-Type": "application/json" }, : null;
body: JSON.stringify(rest), if (!endpoint) {
signal, return {
}); content: [{ type: "text" as const, text: "Unknown action: " + action }],
const data = await res.json() as any; details: { action, error: "Unknown action" },
if (data.error) { };
return { }
content: [{ type: "text" as const, text: "Browser error: " + data.error }], try {
details: { action, error: data.error }, const res = await fetch(TOOL_SERVER + endpoint, {
}; method: "POST",
} headers: { "Content-Type": "application/json" },
// Build text response for LLM body: JSON.stringify(rest),
let textParts: string[] = []; signal,
if (data.url) textParts.push("URL: " + data.url); });
if (data.title) textParts.push("Title: " + data.title); const data = (await res.json()) as any;
if (data.text) textParts.push("Page text:\n" + data.text); if (data.error) {
if (data.evalResult) textParts.push("Eval result: " + data.evalResult); return {
if (data.screenshot) textParts.push("[Screenshot captured]"); content: [{ type: "text" as const, text: "Browser error: " + data.error }],
if (textParts.length === 0) textParts.push("Action completed."); details: { action, error: data.error },
// Include screenshot as image content if available };
const content: any[] = [{ type: "text" as const, text: textParts.join("\n") }]; }
if (data.screenshot) { // Build text response for LLM
content.push({ type: "image" as const, mimeType: "image/jpeg", data: data.screenshot }); const textParts: string[] = [];
} if (data.url) textParts.push("URL: " + data.url);
return { if (data.title) textParts.push("Title: " + data.title);
content, if (data.text) textParts.push("Page text:\n" + data.text);
details: { action, url: data.url, title: data.title, screenshot: data.screenshot, text: data.text, evalResult: data.evalResult }, if (data.evalResult) textParts.push("Eval result: " + data.evalResult);
}; if (data.screenshot) textParts.push("[Screenshot captured]");
} catch (err: any) { if (textParts.length === 0) textParts.push("Action completed.");
return { // Include screenshot as image content if available
content: [{ type: "text" as const, text: "Browser tool error: " + err.message }], const content: any[] = [{ type: "text" as const, text: textParts.join("\n") }];
details: { action, error: err.message }, if (data.screenshot) {
}; content.push({ type: "image" as const, mimeType: "image/jpeg", data: data.screenshot });
} }
}, return {
content,
details: {
action,
url: data.url,
title: data.title,
screenshot: data.screenshot,
text: data.text,
evalResult: data.evalResult,
},
};
} catch (err: any) {
return {
content: [{ type: "text" as const, text: "Browser tool error: " + err.message }],
details: { action, error: err.message },
};
}
},
}; };
class BrowserToolRenderer implements ToolRenderer<any, BrowserDetails> { class BrowserToolRenderer implements ToolRenderer<any, BrowserDetails> {
render(params: any | undefined, result: ToolResultMessage<BrowserDetails> | undefined): ToolRenderResult { render(params: any | undefined, result: ToolResultMessage<BrowserDetails> | undefined): ToolRenderResult {
const state = result ? (result.isError ? "error" : "complete") : "inprogress"; const state = result ? (result.isError ? "error" : "complete") : "inprogress";
const action = result?.details?.action || params?.action || "..."; const action = result?.details?.action || params?.action || "...";
const url = result?.details?.url || params?.url || ""; const url = result?.details?.url || params?.url || "";
const label = url ? action + ": " + url : action; const label = url ? action + ": " + url : action;
if (result?.details?.screenshot) { if (result?.details?.screenshot) {
return { return {
content: html` content: html`
<div class="flex flex-col gap-2"> <div class="flex flex-col gap-2">
${renderHeader(state, Globe, label)} ${renderHeader(state, Globe, label)}
<img src="data:image/jpeg;base64,${result.details.screenshot}" <img src="data:image/jpeg;base64,${result.details.screenshot}"
@ -118,25 +139,25 @@ class BrowserToolRenderer implements ToolRenderer<any, BrowserDetails> {
alt="Browser screenshot" /> alt="Browser screenshot" />
${result.details.title ? html`<span class="text-xs text-muted-foreground">${result.details.title}</span>` : html``} ${result.details.title ? html`<span class="text-xs text-muted-foreground">${result.details.title}</span>` : html``}
</div>`, </div>`,
isCustom: false, isCustom: false,
}; };
} }
if (result?.details?.text) { if (result?.details?.text) {
return { return {
content: html` content: html`
<div class="flex flex-col gap-2"> <div class="flex flex-col gap-2">
${renderHeader(state, Globe, label)} ${renderHeader(state, Globe, label)}
<pre class="text-xs p-3 rounded border border-border overflow-auto max-h-48 whitespace-pre-wrap">${result.details.text}</pre> <pre class="text-xs p-3 rounded border border-border overflow-auto max-h-48 whitespace-pre-wrap">${result.details.text}</pre>
</div>`, </div>`,
isCustom: false, isCustom: false,
}; };
} }
return { content: renderHeader(state, Globe, label), isCustom: false }; return { content: renderHeader(state, Globe, label), isCustom: false };
} }
} }
registerToolRenderer("browser", new BrowserToolRenderer()); registerToolRenderer("browser", new BrowserToolRenderer());
export function createBrowserTool(): AgentTool<typeof browserSchema, BrowserDetails> { export function createBrowserTool(): AgentTool<typeof browserSchema, BrowserDetails> {
return browserTool; return browserTool;
} }

View file

@ -48,10 +48,9 @@ export function renderTool(
export { getToolRenderer, registerToolRenderer }; export { getToolRenderer, registerToolRenderer };
export { type BashDetails, bashTool, createBashTool } from "./bash-tool.js";
export { type BrowserDetails, browserTool, createBrowserTool } from "./browser-tool.js";
export { createImageGenTool, type ImageGenDetails, imageGenTool } from "./image-gen.js"; export { createImageGenTool, type ImageGenDetails, imageGenTool } from "./image-gen.js";
export { createMemoryTools, recallMemoryTool, saveMemoryTool } from "./memory-tool.js"; export { createMemoryTools, recallMemoryTool, saveMemoryTool } from "./memory-tool.js";
export { createTTSTool, type TTSDetails, ttsTool } from "./voice-tts.js"; export { createTTSTool, type TTSDetails, ttsTool } from "./voice-tts.js";
export { createWebSearchTool, type WebSearchDetails, type WebSearchResult, webSearchTool } from "./web-search.js"; export { createWebSearchTool, type WebSearchDetails, type WebSearchResult, webSearchTool } from "./web-search.js";
export { createBashTool, type BashDetails, bashTool } from "./bash-tool.js";
export { createBrowserTool, type BrowserDetails, browserTool } from "./browser-tool.js";