From fedc60fd0fecc39cc8304979aae50f632c35ee0c Mon Sep 17 00:00:00 2001 From: JAE Date: Fri, 27 Mar 2026 04:13:17 +0000 Subject: [PATCH] feat: unified tool-server + Agent Zero-inspired system prompt - Merge 3 servers into single tool-server.mjs on port 7700 - HTTP API: POST /api/bash, /api/browser/* - WebSocket: /ws/terminal (xterm.js panel) - WebSocket: /ws/browser (live browser panel) - SHARED Playwright instance between LLM browser tool and user panel - When AI navigates a page, user sees it live in browser panel - When user clicks in panel, AI tools see the same page state - Remove standalone terminal-server.mjs (was :7701) - Remove standalone browser-server.mjs (was :7702) - Update browser-panel.ts: ws://localhost:7700/ws/browser - Update terminal-panel.ts: ws://localhost:7700/ws/terminal - Agent Zero-inspired system prompt with: - Structured problem-solving methodology (analyse/plan/execute/verify/report) - Clear tool usage rules (no tools for casual chat) - Detailed tool descriptions with usage guidance - Resourceful retry behaviour on failures - npm run dev starts both vite + unified server via concurrently --- .../web-ui/example/server/browser-server.mjs | 65 ----- .../web-ui/example/server/terminal-server.mjs | 27 -- .../web-ui/example/server/tool-server.mjs | 197 +++++++++++++-- .../example/src/components/browser-panel.ts | 2 +- .../example/src/components/terminal-panel.ts | 2 +- packages/web-ui/example/src/main.ts | 72 +++++- packages/web-ui/src/index.ts | 4 +- packages/web-ui/src/tools/bash-tool.ts | 120 ++++----- packages/web-ui/src/tools/browser-tool.ts | 233 ++++++++++-------- packages/web-ui/src/tools/index.ts | 5 +- 10 files changed, 435 insertions(+), 292 deletions(-) delete mode 100644 packages/web-ui/example/server/browser-server.mjs delete mode 100644 packages/web-ui/example/server/terminal-server.mjs diff --git a/packages/web-ui/example/server/browser-server.mjs b/packages/web-ui/example/server/browser-server.mjs deleted file mode 100644 index 5d96f98..0000000 --- a/packages/web-ui/example/server/browser-server.mjs +++ /dev/null @@ -1,65 +0,0 @@ -import { WebSocketServer } from 'ws'; -import { chromium } from 'playwright'; - -const PORT = 7702; -const wss = new WebSocketServer({ port: PORT }); -console.log(`Browser WS server on ws://localhost:${PORT}`); - -let browser = null; - -async function getBrowser() { - if (!browser) browser = await chromium.launch({ headless: true, args: ['--no-sandbox','--disable-setuid-sandbox'] }); - return browser; -} - -wss.on('connection', async (ws) => { - let context = null; - let page = null; - - async function screenshot() { - if (!page) return; - try { - const buf = await page.screenshot({ type: 'jpeg', quality: 70, fullPage: false }); - ws.send(JSON.stringify({ type: 'screenshot', data: buf.toString('base64'), url: page.url() })); - } catch(e) { ws.send(JSON.stringify({ type: 'error', msg: String(e) })); } - } - - async function navigate(url) { - try { - if (!context) { - const b = await getBrowser(); - context = await b.newContext({ viewport: { width: 1280, height: 800 } }); - page = await context.newPage(); - } - if (!url.startsWith('http')) url = 'https://' + url; - ws.send(JSON.stringify({ type: 'loading' })); - await page.goto(url, { timeout: 30000, waitUntil: 'domcontentloaded' }); - await screenshot(); - } catch(e) { ws.send(JSON.stringify({ type: 'error', msg: String(e) })); } - } - - ws.on('message', async (msg) => { - try { - const m = JSON.parse(msg.toString()); - if (m.type === 'navigate') await navigate(m.url); - if (m.type === 'screenshot') await screenshot(); - if (m.type === 'click') { - if (page) { await page.mouse.click(m.x, m.y); await screenshot(); } - } - if (m.type === 'scroll') { - if (page) { await page.mouse.wheel(0, m.dy); await screenshot(); } - } - if (m.type === 'type') { - if (page) { await page.keyboard.type(m.text); await screenshot(); } - } - if (m.type === 'back') { if (page) { await page.goBack(); await screenshot(); } } - if (m.type === 'fwd') { if (page) { await page.goForward(); await screenshot(); } } - if (m.type === 'reload'){ if (page) { await page.reload(); await screenshot(); } } - } catch(e) { ws.send(JSON.stringify({ type: 'error', msg: String(e) })); } - }); - - ws.on('close', async () => { if (context) await context.close().catch(()=>{}); context = null; page = null; }); - - // Send welcome screenshot placeholder - ws.send(JSON.stringify({ type: 'ready' })); -}); diff --git a/packages/web-ui/example/server/terminal-server.mjs b/packages/web-ui/example/server/terminal-server.mjs deleted file mode 100644 index 95dda1e..0000000 --- a/packages/web-ui/example/server/terminal-server.mjs +++ /dev/null @@ -1,27 +0,0 @@ -import { WebSocketServer } from 'ws'; -import { spawn } from 'child_process'; - -const PORT = 7701; -const wss = new WebSocketServer({ port: PORT }); -console.log(`Terminal WS server on ws://localhost:${PORT}`); - -wss.on('connection', (ws) => { - const shell = spawn('/bin/bash', [], { - env: { ...process.env, TERM: 'xterm-256color', COLORTERM: 'truecolor' }, - cwd: process.env.HOME || '/root', - }); - - shell.stdout.on('data', (d) => { try { ws.send(JSON.stringify({ type:'data', data: d.toString('binary') })); } catch{} }); - shell.stderr.on('data', (d) => { try { ws.send(JSON.stringify({ type:'data', data: d.toString('binary') })); } catch{} }); - shell.on('close', (code) => { try { ws.send(JSON.stringify({ type:'exit', code })); ws.close(); } catch{} }); - - ws.on('message', (msg) => { - try { - const m = JSON.parse(msg.toString()); - if (m.type === 'input') shell.stdin.write(m.data); - if (m.type === 'resize') { /* no node-pty resize without pty, best effort */ } - } catch{} - }); - - ws.on('close', () => { shell.kill(); }); -}); diff --git a/packages/web-ui/example/server/tool-server.mjs b/packages/web-ui/example/server/tool-server.mjs index db9a86a..a1ff6bb 100644 --- a/packages/web-ui/example/server/tool-server.mjs +++ b/packages/web-ui/example/server/tool-server.mjs @@ -1,12 +1,13 @@ import http from 'http'; import { exec } from 'child_process'; import { chromium } from 'playwright'; +import { WebSocketServer, WebSocket } from 'ws'; +import { spawn } from 'child_process'; +import url from 'url'; const PORT = parseInt(process.env.TOOL_SERVER_PORT || '7700'); -let browser = null; -let context = null; -let page = null; +// ── CORS ────────────────────────────────────────────────────── const cors = { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'POST, GET, OPTIONS', @@ -24,12 +25,19 @@ function parseBody(req) { }); } +// ── SHARED PLAYWRIGHT BROWSER ───────────────────────────────── +let browser = null; +let context = null; +let page = null; +const browserPanelClients = new Set(); // WS clients watching the browser + async function launchBrowser() { if (!browser) { browser = await chromium.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] }); + console.log('[tool-server] Playwright browser launched'); } return browser; } @@ -50,6 +58,24 @@ async function snap() { return { screenshot: buf.toString('base64'), url: p.url(), title: await p.title() }; } +// Broadcast screenshot to all connected browser panel WebSocket clients +async function broadcastScreenshot() { + if (browserPanelClients.size === 0) return; + try { + const s = await snap(); + const msg = JSON.stringify({ type: 'screenshot', data: s.screenshot, url: s.url, title: s.title }); + for (const ws of browserPanelClients) { + if (ws.readyState === WebSocket.OPEN) ws.send(msg); + } + } catch (e) { + const errMsg = JSON.stringify({ type: 'error', msg: String(e) }); + for (const ws of browserPanelClients) { + if (ws.readyState === WebSocket.OPEN) ws.send(errMsg); + } + } +} + +// ── BASH HANDLER ────────────────────────────────────────────── async function handleBash(body) { const { command, timeout = 30000 } = body; if (!command) return { error: 'No command provided' }; @@ -70,20 +96,25 @@ async function handleBash(body) { }); } +// ── BROWSER HTTP HANDLERS (used by LLM tool) ───────────────── async function handleNavigate(body) { - const { url } = body; - if (!url) return { error: 'No URL' }; + const { url: targetUrl } = body; + if (!targetUrl) return { error: 'No URL' }; const p = await getPage(); - const target = url.startsWith('http') ? url : 'https://' + url; + const target = targetUrl.startsWith('http') ? targetUrl : 'https://' + targetUrl; await p.goto(target, { timeout: 30000, waitUntil: 'domcontentloaded' }); - return snap(); + const result = await snap(); + broadcastScreenshot(); // sync panel + return result; } async function handleClick(body) { const p = await getPage(); await p.mouse.click(body.x || 0, body.y || 0); await p.waitForTimeout(500); - return snap(); + const result = await snap(); + broadcastScreenshot(); + return result; } async function handleType(body) { @@ -91,20 +122,42 @@ async function handleType(body) { if (body.selector) await p.fill(body.selector, body.text || ''); else await p.keyboard.type(body.text || ''); await p.waitForTimeout(300); - return snap(); + const result = await snap(); + broadcastScreenshot(); + return result; } async function handleScroll(body) { const p = await getPage(); await p.mouse.wheel(0, body.dy || 300); await p.waitForTimeout(300); - return snap(); + const result = await snap(); + broadcastScreenshot(); + return result; } async function handleBack() { const p = await getPage(); await p.goBack({ timeout: 10000 }).catch(() => {}); - return snap(); + const result = await snap(); + broadcastScreenshot(); + return result; +} + +async function handleForward() { + const p = await getPage(); + await p.goForward({ timeout: 10000 }).catch(() => {}); + const result = await snap(); + broadcastScreenshot(); + return result; +} + +async function handleReload() { + const p = await getPage(); + await p.reload({ timeout: 15000 }).catch(() => {}); + const result = await snap(); + broadcastScreenshot(); + return result; } async function handleText() { @@ -117,9 +170,11 @@ async function handleEval(body) { const p = await getPage(); const result = await p.evaluate(body.script || 'null'); const ss = await snap(); + broadcastScreenshot(); return { ...ss, evalResult: String(result) }; } +// ── HTTP ROUTES ─────────────────────────────────────────────── const routes = { '/api/bash': handleBash, '/api/browser/navigate': handleNavigate, @@ -127,14 +182,17 @@ const routes = { '/api/browser/type': handleType, '/api/browser/scroll': handleScroll, '/api/browser/back': handleBack, - '/api/browser/screenshot': () => snap(), + '/api/browser/forward': handleForward, + '/api/browser/reload': handleReload, + '/api/browser/screenshot': () => { const r = snap(); broadcastScreenshot(); return r; }, '/api/browser/text': handleText, '/api/browser/eval': handleEval, }; -http.createServer(async (req, res) => { +// ── HTTP SERVER ─────────────────────────────────────────────── +const server = http.createServer(async (req, res) => { if (req.method === 'OPTIONS') { res.writeHead(204, cors); res.end(); return; } - if (req.url === '/health') { res.writeHead(200, cors); res.end(JSON.stringify({ ok: true })); return; } + if (req.url === '/health') { res.writeHead(200, cors); res.end(JSON.stringify({ ok: true, browser: !!browser })); return; } const handler = routes[req.url]; if (req.method === 'POST' && handler) { try { @@ -150,6 +208,113 @@ http.createServer(async (req, res) => { } res.writeHead(404, cors); res.end(JSON.stringify({ error: 'Not found' })); -}).listen(PORT, () => { - console.log(`[tool-server] listening on :${'PORT'}`); +}); + +// ── WEBSOCKET: TERMINAL (/ws/terminal) ──────────────────────── +const terminalWss = new WebSocketServer({ noServer: true }); +terminalWss.on('connection', (ws) => { + console.log('[tool-server] Terminal WS client connected'); + const shell = spawn('/bin/bash', [], { + env: { ...process.env, TERM: 'xterm-256color', COLORTERM: 'truecolor' }, + cwd: process.env.HOME || '/root', + }); + shell.stdout.on('data', (d) => { try { ws.send(JSON.stringify({ type: 'data', data: d.toString('binary') })); } catch {} }); + shell.stderr.on('data', (d) => { try { ws.send(JSON.stringify({ type: 'data', data: d.toString('binary') })); } catch {} }); + shell.on('close', (code) => { try { ws.send(JSON.stringify({ type: 'exit', code })); ws.close(); } catch {} }); + ws.on('message', (msg) => { + try { + const m = JSON.parse(msg.toString()); + if (m.type === 'input') shell.stdin.write(m.data); + if (m.type === 'resize') { /* best effort without node-pty */ } + } catch {} + }); + ws.on('close', () => { shell.kill(); }); +}); + +// ── WEBSOCKET: BROWSER PANEL (/ws/browser) ──────────────────── +const browserWss = new WebSocketServer({ noServer: true }); +browserWss.on('connection', async (ws) => { + console.log('[tool-server] Browser panel WS client connected'); + browserPanelClients.add(ws); + ws.on('close', () => browserPanelClients.delete(ws)); + ws.on('error', () => browserPanelClients.delete(ws)); + + // Handle panel user interactions (navigate, click, scroll, etc.) + ws.on('message', async (msg) => { + try { + const m = JSON.parse(msg.toString()); + if (m.type === 'navigate') { + const p = await getPage(); + const target = m.url.startsWith('http') ? m.url : 'https://' + m.url; + ws.send(JSON.stringify({ type: 'loading' })); + await p.goto(target, { timeout: 30000, waitUntil: 'domcontentloaded' }); + await broadcastScreenshot(); + } + if (m.type === 'click') { + const p = await getPage(); + await p.mouse.click(m.x || 0, m.y || 0); + await p.waitForTimeout(500); + await broadcastScreenshot(); + } + if (m.type === 'scroll') { + const p = await getPage(); + await p.mouse.wheel(0, m.dy || 300); + await p.waitForTimeout(300); + await broadcastScreenshot(); + } + if (m.type === 'type') { + const p = await getPage(); + await p.keyboard.type(m.text || ''); + await p.waitForTimeout(300); + await broadcastScreenshot(); + } + if (m.type === 'back') { + const p = await getPage(); + await p.goBack({ timeout: 10000 }).catch(() => {}); + await broadcastScreenshot(); + } + if (m.type === 'fwd') { + const p = await getPage(); + await p.goForward({ timeout: 10000 }).catch(() => {}); + await broadcastScreenshot(); + } + if (m.type === 'reload') { + const p = await getPage(); + await p.reload({ timeout: 15000 }).catch(() => {}); + await broadcastScreenshot(); + } + if (m.type === 'screenshot') { + await broadcastScreenshot(); + } + } catch (e) { + ws.send(JSON.stringify({ type: 'error', msg: String(e) })); + } + }); + + // Send initial ready + screenshot if browser exists + ws.send(JSON.stringify({ type: 'ready' })); + if (page && !page.isClosed()) { + try { await broadcastScreenshot(); } catch {} + } +}); + +// ── UPGRADE HANDLER (route WS by path) ──────────────────────── +server.on('upgrade', (req, socket, head) => { + const pathname = url.parse(req.url).pathname; + if (pathname === '/ws/terminal') { + terminalWss.handleUpgrade(req, socket, head, (ws) => terminalWss.emit('connection', ws, req)); + } else if (pathname === '/ws/browser') { + browserWss.handleUpgrade(req, socket, head, (ws) => browserWss.emit('connection', ws, req)); + } else { + socket.destroy(); + } +}); + +// ── START ───────────────────────────────────────────────────── +server.listen(PORT, () => { + console.log(`[tool-server] Unified server on http://localhost:${PORT}`); + console.log(`[tool-server] HTTP API: POST /api/bash, /api/browser/*`); + console.log(`[tool-server] Terminal WS: ws://localhost:${PORT}/ws/terminal`); + console.log(`[tool-server] Browser WS: ws://localhost:${PORT}/ws/browser`); + console.log(`[tool-server] Health: GET /health`); }); diff --git a/packages/web-ui/example/src/components/browser-panel.ts b/packages/web-ui/example/src/components/browser-panel.ts index 265516d..0288eb0 100644 --- a/packages/web-ui/example/src/components/browser-panel.ts +++ b/packages/web-ui/example/src/components/browser-panel.ts @@ -32,7 +32,7 @@ export class JaeBrowserPanel extends LitElement { } connect() { - this.ws = new WebSocket("ws://localhost:7702"); + this.ws = new WebSocket("ws://localhost:7700/ws/browser"); this.ws.onopen = () => { this.connected = true; this.requestUpdate(); diff --git a/packages/web-ui/example/src/components/terminal-panel.ts b/packages/web-ui/example/src/components/terminal-panel.ts index 6437706..7d8a81e 100644 --- a/packages/web-ui/example/src/components/terminal-panel.ts +++ b/packages/web-ui/example/src/components/terminal-panel.ts @@ -68,7 +68,7 @@ export class JaeTerminalPanel extends LitElement { this.term.open(this.container); this.fitAddon.fit(); - this.ws = new WebSocket("ws://localhost:7701"); + this.ws = new WebSocket("ws://localhost:7700/ws/terminal"); this.ws.onopen = () => { this.connected = true; this.connecting = false; diff --git a/packages/web-ui/example/src/main.ts b/packages/web-ui/example/src/main.ts index b892f9d..56a3217 100644 --- a/packages/web-ui/example/src/main.ts +++ b/packages/web-ui/example/src/main.ts @@ -315,21 +315,69 @@ const createAgent = async (initialState?: Partial) => { if (agentUnsubscribe) agentUnsubscribe(); agent = new Agent({ initialState: initialState || { - systemPrompt: `You are JAE, a helpful AI coding assistant. Respond in plain text by default. + systemPrompt: `You are JAE, an autonomous AI coding assistant with tool access. You solve tasks methodically using available tools and report results clearly. -CRITICAL: Do NOT call any tools unless the user EXPLICITLY asks for something that requires a tool. Greetings, questions, casual chat, and general conversation require ZERO tools — just reply with plain text. Never create artifacts, files, or run code unless specifically requested. +## Your Role +- Expert coding assistant specialising in software development, debugging, research, and creative tasks +- You have direct access to a Linux terminal, a web browser, web search, image generation, a JavaScript sandbox, text-to-speech, and persistent memory +- You execute tasks yourself — never instruct the user to do something you can do with your tools +- Be conversational for casual chat (no tools needed), but switch to methodical tool use when given a real task -Available tools (use ONLY when explicitly requested): -- bash: Run shell commands -- browser: Browse the web -- web_search: Search the internet -- image_gen: Generate images -- javascript_repl: Run JS code or create HTML artifacts -- tts: Text to speech -- memory: Save/recall information +## Communication Rules +- For greetings, questions, explanations, or casual chat: respond in plain text only, NO tool calls +- For tasks requiring action: think step-by-step, use tools, report results +- Use markdown formatting for structured responses (headers, code blocks, lists, tables) +- Be direct and concise — avoid filler phrases -DO NOT use tools for: "hi", "hello", "how are you", "what is X", "explain Y", "testing", "test", or any general question. -ONLY use tools for: "search for X", "run this code", "create a webpage", "generate an image of X", "open google.com", etc.`, +## Problem-Solving Methodology +When given a task that requires tools: +1. **Analyse**: Break down what needs to be done +2. **Plan**: Outline your approach in 2-3 sentences +3. **Execute**: Use the appropriate tools step by step +4. **Verify**: Check the output and confirm success +5. **Report**: Present results clearly to the user + +Do not give up easily. If a tool fails, try an alternative approach. Be resourceful. + +## Available Tools + +### bash +Execute shell commands on the host system. Use for: file operations, installing packages, running scripts, system tasks, git commands. +- Always check command output for errors +- Use for quick tasks like 'ls', 'cat', 'grep', 'curl', etc. + +### browser +Control a headless Chromium browser. Actions: navigate (url), click (x,y), type (text), scroll (dy), back, screenshot, text (extract page text), eval (run JS in page). +- Navigate first, then interact +- Use 'text' action to read page content for the LLM +- The user can see the browser live in the right panel + +### web_search +Search the internet via DuckDuckGo. Use for: finding current information, researching topics, looking up documentation. +- Search first, then use browser to visit promising results if needed + +### javascript_repl +Run JavaScript code in a sandboxed environment. Use for: calculations, data processing, creating HTML/SVG artifacts that render inline. +- Return HTML strings to create visual artifacts +- Great for charts, diagrams, interactive demos + +### image_gen +Generate images using AI. Use when the user asks to create, draw, or generate an image. + +### tts +Convert text to speech audio. Use when asked to read aloud or generate audio. + +### memory_save / memory_query / memory_delete +Persist information across sessions. Use to save important context, user preferences, or project details. +- Save key facts the user tells you about themselves or their projects +- Query memory when context from past conversations might be relevant + +## Critical Rules +- **NO tools for casual chat**: "hi", "how are you", "what is X", "explain Y" → plain text response +- **YES tools for action requests**: "search for X", "run this code", "open google.com", "create a webpage", "find out about..." → use tools +- Never create files or artifacts unless the user asks for them +- Never run code unless the user asks you to +- If unsure whether to use a tool, respond with text and ask for clarification`, model: getModel("venice", "llama-3.3-70b"), thinkingLevel: "off", messages: [], diff --git a/packages/web-ui/src/index.ts b/packages/web-ui/src/index.ts index 0ffd49d..feae105 100644 --- a/packages/web-ui/src/index.ts +++ b/packages/web-ui/src/index.ts @@ -101,6 +101,8 @@ export { ImageArtifact } from "./tools/artifacts/ImageArtifact.js"; export { MarkdownArtifact } from "./tools/artifacts/MarkdownArtifact.js"; export { SvgArtifact } from "./tools/artifacts/SvgArtifact.js"; export { TextArtifact } from "./tools/artifacts/TextArtifact.js"; +export { bashTool, createBashTool } from "./tools/bash-tool.js"; +export { browserTool, createBrowserTool } from "./tools/browser-tool.js"; export { createExtractDocumentTool, extractDocumentTool } from "./tools/extract-document.js"; export { createImageGenTool, imageGenTool } from "./tools/image-gen.js"; // Tools @@ -119,8 +121,6 @@ export type { ToolRenderer, ToolRenderResult } from "./tools/types.js"; export { createTTSTool, ttsTool } from "./tools/voice-tts.js"; // Venice / community tools export { createWebSearchTool, webSearchTool } from "./tools/web-search.js"; -export { createBashTool, bashTool } from "./tools/bash-tool.js"; -export { createBrowserTool, browserTool } from "./tools/browser-tool.js"; export type { Attachment } from "./utils/attachment-utils.js"; // Utils export { loadAttachment } from "./utils/attachment-utils.js"; diff --git a/packages/web-ui/src/tools/bash-tool.ts b/packages/web-ui/src/tools/bash-tool.ts index f1c8d46..355b1ac 100644 --- a/packages/web-ui/src/tools/bash-tool.ts +++ b/packages/web-ui/src/tools/bash-tool.ts @@ -6,84 +6,86 @@ import { Terminal } from "lucide"; import { registerToolRenderer, renderHeader } from "./renderer-registry.js"; import type { ToolRenderer, ToolRenderResult } from "./types.js"; -const TOOL_SERVER = typeof window !== "undefined" - ? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700" - : "http://localhost:7700"; +const TOOL_SERVER = + typeof window !== "undefined" + ? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700" + : "http://localhost:7700"; const bashSchema = Type.Object({ - command: Type.String({ description: "Shell command to execute" }), - timeout: Type.Optional(Type.Number({ description: "Timeout in ms (default: 30000)" })), + command: Type.String({ description: "Shell command to execute" }), + timeout: Type.Optional(Type.Number({ description: "Timeout in ms (default: 30000)" })), }); export interface BashDetails { - stdout: string; - stderr: string; - exitCode: number; - command: string; + stdout: string; + stderr: string; + exitCode: number; + command: string; } export const bashTool: AgentTool = { - name: "bash", - label: "Terminal", - description: "Execute a shell command on the server. Use for file operations, installing packages, running scripts, git commands, etc.", - parameters: bashSchema, - async execute(toolCallId, params, signal) { - const { command, timeout = 30000 } = params; - try { - const res = await fetch(TOOL_SERVER + "/api/bash", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ command, timeout }), - signal, - }); - const data = await res.json() as any; - if (data.error) { - return { - content: [{ type: "text" as const, text: "Error: " + data.error }], - details: { stdout: "", stderr: data.error, exitCode: 1, command }, - }; - } - const output = data.output || data.stdout || ""; - return { - content: [{ type: "text" as const, text: output.slice(0, 10000) || "(no output)" }], - details: { stdout: data.stdout, stderr: data.stderr, exitCode: data.exitCode, command }, - }; - } catch (err: any) { - return { - content: [{ type: "text" as const, text: "Bash tool error: " + err.message }], - details: { stdout: "", stderr: err.message, exitCode: 1, command }, - }; - } - }, + name: "bash", + label: "Terminal", + description: + "Execute a shell command on the server. Use for file operations, installing packages, running scripts, git commands, etc.", + parameters: bashSchema, + async execute(toolCallId, params, signal) { + const { command, timeout = 30000 } = params; + try { + const res = await fetch(TOOL_SERVER + "/api/bash", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ command, timeout }), + signal, + }); + const data = (await res.json()) as any; + if (data.error) { + return { + content: [{ type: "text" as const, text: "Error: " + data.error }], + details: { stdout: "", stderr: data.error, exitCode: 1, command }, + }; + } + const output = data.output || data.stdout || ""; + return { + content: [{ type: "text" as const, text: output.slice(0, 10000) || "(no output)" }], + details: { stdout: data.stdout, stderr: data.stderr, exitCode: data.exitCode, command }, + }; + } catch (err: any) { + return { + content: [{ type: "text" as const, text: "Bash tool error: " + err.message }], + details: { stdout: "", stderr: err.message, exitCode: 1, command }, + }; + } + }, }; class BashToolRenderer implements ToolRenderer<{ command: string }, BashDetails> { - render( - params: { command: string } | undefined, - result: ToolResultMessage | undefined, - ): ToolRenderResult { - const state = result ? (result.isError ? "error" : "complete") : "inprogress"; - const cmd = result?.details?.command || params?.command || "..."; - const exitCode = result?.details?.exitCode; - const icon = state === "error" ? "text-red-500" : exitCode === 0 ? "text-green-500" : ""; - if (result?.details) { - const d = result.details; - return { - content: html` + render( + params: { command: string } | undefined, + result: ToolResultMessage | undefined, + ): ToolRenderResult { + const state = result ? (result.isError ? "error" : "complete") : "inprogress"; + const cmd = result?.details?.command || params?.command || "..."; + const exitCode = result?.details?.exitCode; + const icon = state === "error" ? "text-red-500" : exitCode === 0 ? "text-green-500" : ""; + if (result?.details) { + const d = result.details; + return { + content: html`
${renderHeader(state, Terminal, "$ " + cmd)}
${d.stdout}${d.stderr ? "\nSTDERR: " + d.stderr : ""}
Exit code: ${d.exitCode}
`, - isCustom: false, - }; - } - return { content: renderHeader(state, Terminal, "$ " + cmd), isCustom: false }; - } + isCustom: false, + }; + } + return { content: renderHeader(state, Terminal, "$ " + cmd), isCustom: false }; + } } registerToolRenderer("bash", new BashToolRenderer()); export function createBashTool(): AgentTool { - return bashTool; + return bashTool; } diff --git a/packages/web-ui/src/tools/browser-tool.ts b/packages/web-ui/src/tools/browser-tool.ts index 51c4f6e..81f8dde 100644 --- a/packages/web-ui/src/tools/browser-tool.ts +++ b/packages/web-ui/src/tools/browser-tool.ts @@ -6,111 +6,132 @@ import { Globe } from "lucide"; import { registerToolRenderer, renderHeader } from "./renderer-registry.js"; import type { ToolRenderer, ToolRenderResult } from "./types.js"; -const TOOL_SERVER = typeof window !== "undefined" - ? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700" - : "http://localhost:7700"; +const TOOL_SERVER = + typeof window !== "undefined" + ? (window as any).__JAE_TOOL_SERVER__ || "http://localhost:7700" + : "http://localhost:7700"; const browserSchema = Type.Object({ - action: Type.Union([ - Type.Literal("navigate"), - Type.Literal("click"), - Type.Literal("type"), - Type.Literal("scroll"), - Type.Literal("back"), - Type.Literal("screenshot"), - Type.Literal("text"), - Type.Literal("eval"), - ], { description: "Browser action to perform" }), - url: Type.Optional(Type.String({ description: "URL to navigate to" })), - x: Type.Optional(Type.Number({ description: "Click X coordinate" })), - y: Type.Optional(Type.Number({ description: "Click Y coordinate" })), - text: Type.Optional(Type.String({ description: "Text to type" })), - selector: Type.Optional(Type.String({ description: "CSS selector to type into" })), - dy: Type.Optional(Type.Number({ description: "Scroll delta Y pixels" })), - script: Type.Optional(Type.String({ description: "JavaScript to evaluate in page" })), + action: Type.Union( + [ + Type.Literal("navigate"), + Type.Literal("click"), + Type.Literal("type"), + Type.Literal("scroll"), + Type.Literal("back"), + Type.Literal("screenshot"), + Type.Literal("text"), + Type.Literal("eval"), + ], + { description: "Browser action to perform" }, + ), + url: Type.Optional(Type.String({ description: "URL to navigate to" })), + x: Type.Optional(Type.Number({ description: "Click X coordinate" })), + y: Type.Optional(Type.Number({ description: "Click Y coordinate" })), + text: Type.Optional(Type.String({ description: "Text to type" })), + selector: Type.Optional(Type.String({ description: "CSS selector to type into" })), + dy: Type.Optional(Type.Number({ description: "Scroll delta Y pixels" })), + script: Type.Optional(Type.String({ description: "JavaScript to evaluate in page" })), }); export interface BrowserDetails { - action: string; - url?: string; - title?: string; - screenshot?: string; - text?: string; - evalResult?: string; - error?: string; + action: string; + url?: string; + title?: string; + screenshot?: string; + text?: string; + evalResult?: string; + error?: string; } export const browserTool: AgentTool = { - name: "browser", - label: "Browser", - description: "Control a headless browser. Actions: navigate (url), click (x,y), type (text, optional selector), scroll (dy), back, screenshot, text (get page text), eval (run JS).", - parameters: browserSchema, - async execute(toolCallId, params, signal) { - const { action, ...rest } = params; - const endpoint = action === "navigate" ? "/api/browser/navigate" - : action === "click" ? "/api/browser/click" - : action === "type" ? "/api/browser/type" - : action === "scroll" ? "/api/browser/scroll" - : action === "back" ? "/api/browser/back" - : action === "screenshot" ? "/api/browser/screenshot" - : action === "text" ? "/api/browser/text" - : action === "eval" ? "/api/browser/eval" - : null; - if (!endpoint) { - return { - content: [{ type: "text" as const, text: "Unknown action: " + action }], - details: { action, error: "Unknown action" }, - }; - } - try { - const res = await fetch(TOOL_SERVER + endpoint, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(rest), - signal, - }); - const data = await res.json() as any; - if (data.error) { - return { - content: [{ type: "text" as const, text: "Browser error: " + data.error }], - details: { action, error: data.error }, - }; - } - // Build text response for LLM - let textParts: string[] = []; - if (data.url) textParts.push("URL: " + data.url); - if (data.title) textParts.push("Title: " + data.title); - if (data.text) textParts.push("Page text:\n" + data.text); - if (data.evalResult) textParts.push("Eval result: " + data.evalResult); - if (data.screenshot) textParts.push("[Screenshot captured]"); - if (textParts.length === 0) textParts.push("Action completed."); - // Include screenshot as image content if available - const content: any[] = [{ type: "text" as const, text: textParts.join("\n") }]; - if (data.screenshot) { - content.push({ type: "image" as const, mimeType: "image/jpeg", data: data.screenshot }); - } - return { - content, - details: { action, url: data.url, title: data.title, screenshot: data.screenshot, text: data.text, evalResult: data.evalResult }, - }; - } catch (err: any) { - return { - content: [{ type: "text" as const, text: "Browser tool error: " + err.message }], - details: { action, error: err.message }, - }; - } - }, + name: "browser", + label: "Browser", + description: + "Control a headless browser. Actions: navigate (url), click (x,y), type (text, optional selector), scroll (dy), back, screenshot, text (get page text), eval (run JS).", + parameters: browserSchema, + async execute(toolCallId, params, signal) { + const { action, ...rest } = params; + const endpoint = + action === "navigate" + ? "/api/browser/navigate" + : action === "click" + ? "/api/browser/click" + : action === "type" + ? "/api/browser/type" + : action === "scroll" + ? "/api/browser/scroll" + : action === "back" + ? "/api/browser/back" + : action === "screenshot" + ? "/api/browser/screenshot" + : action === "text" + ? "/api/browser/text" + : action === "eval" + ? "/api/browser/eval" + : null; + if (!endpoint) { + return { + content: [{ type: "text" as const, text: "Unknown action: " + action }], + details: { action, error: "Unknown action" }, + }; + } + try { + const res = await fetch(TOOL_SERVER + endpoint, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(rest), + signal, + }); + const data = (await res.json()) as any; + if (data.error) { + return { + content: [{ type: "text" as const, text: "Browser error: " + data.error }], + details: { action, error: data.error }, + }; + } + // Build text response for LLM + const textParts: string[] = []; + if (data.url) textParts.push("URL: " + data.url); + if (data.title) textParts.push("Title: " + data.title); + if (data.text) textParts.push("Page text:\n" + data.text); + if (data.evalResult) textParts.push("Eval result: " + data.evalResult); + if (data.screenshot) textParts.push("[Screenshot captured]"); + if (textParts.length === 0) textParts.push("Action completed."); + // Include screenshot as image content if available + const content: any[] = [{ type: "text" as const, text: textParts.join("\n") }]; + if (data.screenshot) { + content.push({ type: "image" as const, mimeType: "image/jpeg", data: data.screenshot }); + } + return { + content, + details: { + action, + url: data.url, + title: data.title, + screenshot: data.screenshot, + text: data.text, + evalResult: data.evalResult, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text" as const, text: "Browser tool error: " + err.message }], + details: { action, error: err.message }, + }; + } + }, }; class BrowserToolRenderer implements ToolRenderer { - render(params: any | undefined, result: ToolResultMessage | undefined): ToolRenderResult { - const state = result ? (result.isError ? "error" : "complete") : "inprogress"; - const action = result?.details?.action || params?.action || "..."; - const url = result?.details?.url || params?.url || ""; - const label = url ? action + ": " + url : action; - if (result?.details?.screenshot) { - return { - content: html` + render(params: any | undefined, result: ToolResultMessage | undefined): ToolRenderResult { + const state = result ? (result.isError ? "error" : "complete") : "inprogress"; + const action = result?.details?.action || params?.action || "..."; + const url = result?.details?.url || params?.url || ""; + const label = url ? action + ": " + url : action; + if (result?.details?.screenshot) { + return { + content: html`
${renderHeader(state, Globe, label)} { alt="Browser screenshot" /> ${result.details.title ? html`${result.details.title}` : html``}
`, - isCustom: false, - }; - } - if (result?.details?.text) { - return { - content: html` + isCustom: false, + }; + } + if (result?.details?.text) { + return { + content: html`
${renderHeader(state, Globe, label)}
${result.details.text}
`, - isCustom: false, - }; - } - return { content: renderHeader(state, Globe, label), isCustom: false }; - } + isCustom: false, + }; + } + return { content: renderHeader(state, Globe, label), isCustom: false }; + } } registerToolRenderer("browser", new BrowserToolRenderer()); export function createBrowserTool(): AgentTool { - return browserTool; + return browserTool; } diff --git a/packages/web-ui/src/tools/index.ts b/packages/web-ui/src/tools/index.ts index aa4049a..ef319a5 100644 --- a/packages/web-ui/src/tools/index.ts +++ b/packages/web-ui/src/tools/index.ts @@ -48,10 +48,9 @@ export function renderTool( export { getToolRenderer, registerToolRenderer }; +export { type BashDetails, bashTool, createBashTool } from "./bash-tool.js"; +export { type BrowserDetails, browserTool, createBrowserTool } from "./browser-tool.js"; export { createImageGenTool, type ImageGenDetails, imageGenTool } from "./image-gen.js"; export { createMemoryTools, recallMemoryTool, saveMemoryTool } from "./memory-tool.js"; export { createTTSTool, type TTSDetails, ttsTool } from "./voice-tts.js"; export { createWebSearchTool, type WebSearchDetails, type WebSearchResult, webSearchTool } from "./web-search.js"; - -export { createBashTool, type BashDetails, bashTool } from "./bash-tool.js"; -export { createBrowserTool, type BrowserDetails, browserTool } from "./browser-tool.js";