Agent-JAE/packages/coding-agent/test/test-harness.ts

/**
 * Test harness for AgentSession runtime testing.
 *
 * Provides:
 * - A faux stream function with declarative response sequencing
 * - A one-call factory for a fully wired AgentSession with real in-memory dependencies
 * - Event capture for assertions
 */

import { existsSync, mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import type { AgentTool } from "@jaeswift/jae-agent-core";
import { Agent } from "@jaeswift/jae-agent-core";
import type {
	AssistantMessage,
	AssistantMessageEvent,
	AssistantMessageEventStream,
	Context,
	Model,
	SimpleStreamOptions,
	StopReason,
	TextContent,
	ThinkingContent,
	ToolCall,
	Usage,
} from "@jaeswift/jae-ai";
import { createAssistantMessageEventStream } from "@jaeswift/jae-ai";
import { AgentSession, type AgentSessionEvent } from "../src/core/agent-session.js";
import { AuthStorage } from "../src/core/auth-storage.js";
import { ModelRegistry } from "../src/core/model-registry.js";
import { SessionManager } from "../src/core/session-manager.js";
import type { Settings } from "../src/core/settings-manager.js";
import { SettingsManager } from "../src/core/settings-manager.js";
import type { ExtensionFactory, ResourceLoader } from "../src/index.js";
import {
	type CreateTestExtensionsResultInput,
	createTestExtensionsResult,
	createTestResourceLoader,
} from "./utilities.js";

// ============================================================================
// Faux model
// ============================================================================

const FAUX_PROVIDER = "faux";
const FAUX_MODEL_ID = "faux-1";
const FAUX_API = "anthropic-messages" as const;

export const fauxModel: Model<typeof FAUX_API> = {
	id: FAUX_MODEL_ID,
	name: "Faux Model",
	api: FAUX_API,
	provider: FAUX_PROVIDER,
	baseUrl: "http://localhost:0",
	reasoning: false,
	input: ["text", "image"],
	cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
	contextWindow: 128000,
	maxTokens: 16384,
};

// ============================================================================
// Response description
// ============================================================================

export interface FauxResponse {
	/** Text content blocks. String shorthand becomes a single text block. */
	text?: string;
	/** Tool calls to include in the response. */
	toolCalls?: Array<{ id?: string; name: string; args: Record<string, unknown> }>;
	/** Thinking content. */
	thinking?: string;
	/** Stop reason. Defaults to "stop", or "toolUse" if toolCalls are present, or "error" if error is set. */
	stopReason?: StopReason;
	/** Error message. Sets stopReason to "error" if not explicitly set. */
	error?: string;
	/** Usage numbers. Merged with defaults (input: 100, output: 50). */
	usage?: Partial<Usage>;
	/** Delay in ms before the response starts. */
	delayMs?: number;
	/** Model overrides (provider, model id) for responses that should look like they came from a different model. */
	model?: { provider?: string; id?: string };
}

/** Shorthand: a string becomes a simple text response. */
export type FauxResponseInput = FauxResponse | string;

// ============================================================================
// Faux stream function
// ============================================================================

function normalizeResponse(input: FauxResponseInput): FauxResponse {
	if (typeof input === "string") {
		return { text: input };
	}
	return input;
}

function buildUsage(partial?: Partial<Usage>): Usage {
	const input = partial?.input ?? 100;
	const output = partial?.output ?? 50;
	const cacheRead = partial?.cacheRead ?? 0;
	const cacheWrite = partial?.cacheWrite ?? 0;
	return {
		input,
		output,
		cacheRead,
		cacheWrite,
		totalTokens: partial?.totalTokens ?? input + output + cacheRead + cacheWrite,
		cost: partial?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
	};
}

let toolCallIdCounter = 0;

function buildAssistantMessage(resp: FauxResponse): AssistantMessage {
	const content: (TextContent | ThinkingContent | ToolCall)[] = [];

	if (resp.thinking) {
		content.push({ type: "thinking", thinking: resp.thinking });
	}
	if (resp.text !== undefined) {
		content.push({ type: "text", text: resp.text });
	}
	if (resp.toolCalls) {
		for (const tc of resp.toolCalls) {
			content.push({
				type: "toolCall",
				id: tc.id ?? `faux_tc_${++toolCallIdCounter}`,
				name: tc.name,
				arguments: tc.args,
			});
		}
	}

	// If no content was added at all, add empty text
	if (content.length === 0 && !resp.error) {
		content.push({ type: "text", text: "" });
	}

	let stopReason: StopReason;
	if (resp.stopReason) {
		stopReason = resp.stopReason;
	} else if (resp.error) {
		stopReason = "error";
	} else if (resp.toolCalls && resp.toolCalls.length > 0) {
		stopReason = "toolUse";
	} else {
		stopReason = "stop";
	}

	return {
		role: "assistant",
		content,
		api: FAUX_API,
		provider: resp.model?.provider ?? FAUX_PROVIDER,
		model: resp.model?.id ?? FAUX_MODEL_ID,
		usage: buildUsage(resp.usage),
		stopReason,
		errorMessage: resp.error,
		timestamp: Date.now(),
	};
}

// ============================================================================
// Token-level streaming
// ============================================================================

/** Split a string into chunks of varying size (3-5 chars) for simulating token-by-token streaming. */
function chunkString(text: string): string[] {
	const chunks: string[] = [];
	let i = 0;
	while (i < text.length) {
		const size = 3 + Math.floor(Math.random() * 3); // 3, 4, or 5
		chunks.push(text.slice(i, i + size));
		i += size;
	}
	return chunks.length > 0 ? chunks : [""];
}

/**
 * Stream a complete AssistantMessage through an EventStream with realistic
 * intermediate delta events for each content block.
 */
function streamWithDeltas(stream: AssistantMessageEventStream, message: AssistantMessage): void {
	const isError = message.stopReason === "error" || message.stopReason === "aborted";

	// Build partial progressively as we stream content blocks
	const partial: AssistantMessage = { ...message, content: [] };
	stream.push({ type: "start", partial: { ...partial } });

	for (let i = 0; i < message.content.length; i++) {
		const block = message.content[i];

		if (block.type === "thinking") {
			partial.content = [...partial.content, { type: "thinking", thinking: "" }];
			stream.push({ type: "thinking_start", contentIndex: i, partial: { ...partial } });

			for (const chunk of chunkString(block.thinking)) {
				(partial.content[i] as ThinkingContent).thinking += chunk;
				stream.push(makeEvent("thinking_delta", i, chunk, partial));
			}

			stream.push({
				type: "thinking_end",
				contentIndex: i,
				content: block.thinking,
				partial: { ...partial },
			});
		} else if (block.type === "text") {
			partial.content = [...partial.content, { type: "text", text: "" }];
			stream.push({ type: "text_start", contentIndex: i, partial: { ...partial } });

			for (const chunk of chunkString(block.text)) {
				(partial.content[i] as TextContent).text += chunk;
				stream.push(makeEvent("text_delta", i, chunk, partial));
			}

			stream.push({
				type: "text_end",
				contentIndex: i,
				content: block.text,
				partial: { ...partial },
			});
		} else if (block.type === "toolCall") {
			const argsJson = JSON.stringify(block.arguments);
			partial.content = [...partial.content, { type: "toolCall", id: block.id, name: block.name, arguments: {} }];
			stream.push({ type: "toolcall_start", contentIndex: i, partial: { ...partial } });

			for (const chunk of chunkString(argsJson)) {
				stream.push(makeEvent("toolcall_delta", i, chunk, partial));
			}

			// Final toolcall has the real parsed arguments
			(partial.content[i] as ToolCall).arguments = block.arguments;
			stream.push({
				type: "toolcall_end",
				contentIndex: i,
				toolCall: block,
				partial: { ...partial },
			});
		}
	}

	if (isError) {
		stream.push({ type: "error", reason: message.stopReason as "error" | "aborted", error: message });
	} else {
		stream.push({ type: "done", reason: message.stopReason as "stop" | "length" | "toolUse", message });
	}
}

function makeEvent(
	type: "text_delta" | "thinking_delta" | "toolcall_delta",
	contentIndex: number,
	delta: string,
	partial: AssistantMessage,
): AssistantMessageEvent {
	return { type, contentIndex, delta, partial: { ...partial } };
}

// ============================================================================
// Stream function factory
// ============================================================================

export interface FauxStreamFnState {
	/** Number of times the stream function has been called. */
	callCount: number;
	/** The context passed to each call, in order. */
	contexts: Context[];
}

/**
 * Create a faux stream function from a sequence of response descriptions.
 *
 * The function cycles through responses in order. If more calls are made than
 * responses provided, it wraps around.
 *
 * Returns the stream function and a state object for inspection.
 */
export function createFauxStreamFn(responses: FauxResponseInput[]): {
	streamFn: (model: Model<any>, context: Context, options?: SimpleStreamOptions) => AssistantMessageEventStream;
	state: FauxStreamFnState;
} {
	if (responses.length === 0) {
		throw new Error("createFauxStreamFn requires at least one response");
	}

	const state: FauxStreamFnState = { callCount: 0, contexts: [] };

	const streamFn = (_model: Model<any>, context: Context, _options?: SimpleStreamOptions) => {
		const index = state.callCount % responses.length;
		state.callCount++;
		state.contexts.push(context);

		const resp = normalizeResponse(responses[index]);
		const message = buildAssistantMessage(resp);
		const stream = createAssistantMessageEventStream();

		const emit = () => {
			streamWithDeltas(stream, message);
		};

		if (resp.delayMs && resp.delayMs > 0) {
			setTimeout(emit, resp.delayMs);
		} else {
			queueMicrotask(emit);
		}

		return stream;
	};

	return { streamFn, state };
}

// ============================================================================
// Session harness
// ============================================================================

export interface HarnessOptions {
	/** Response sequence for the faux provider. Default: single "ok" response. */
	responses?: FauxResponseInput[];
	/** Model to use. Default: fauxModel. */
	model?: Model<any>;
	/** Context window override (applied to the model). */
	contextWindow?: number;
	/** Settings overrides (retry, compaction, etc.). */
	settings?: Partial<Settings>;
	/** System prompt. Default: "You are a test assistant." */
	systemPrompt?: string;
	/** Custom tools to register on the agent. */
	tools?: AgentTool[];
	/** Base tools override (replaces built-in read/bash/edit/write). */
	baseToolsOverride?: Record<string, AgentTool>;
	/** Optional resource loader override. */
	resourceLoader?: ResourceLoader;
	/** Inline extensions to load into the session resource loader. */
	extensionFactories?: Array<ExtensionFactory | CreateTestExtensionsResultInput>;
}

export interface Harness {
	session: AgentSession;
	agent: Agent;
	sessionManager: SessionManager;
	settingsManager: SettingsManager;
	/** Faux stream function state (call count, captured contexts). */
	faux: FauxStreamFnState;
	/** All events emitted by the session, in order. */
	events: AgentSessionEvent[];
	/** Filter captured events by type. */
	eventsOfType<T extends AgentSessionEvent["type"]>(type: T): Extract<AgentSessionEvent, { type: T }>[];
	/** Temp directory (cleaned up by cleanup()). */
	tempDir: string;
	/** Dispose session and remove temp directory. */
	cleanup: () => void;
}

function createTempDir(): string {
	const tempDir = join(tmpdir(), `pi-harness-${Date.now()}-${Math.random().toString(36).slice(2)}`);
	mkdirSync(tempDir, { recursive: true });
	return tempDir;
}

function createHarnessWithResourceLoader(
	options: HarnessOptions,
	resourceLoader: ResourceLoader,
	tempDir: string,
): Harness {
	const baseModel = options.model ?? fauxModel;
	const model: Model<any> = options.contextWindow ? { ...baseModel, contextWindow: options.contextWindow } : baseModel;

	const { streamFn, state: fauxState } = createFauxStreamFn(options.responses ?? ["ok"]);

	const agent = new Agent({
		getApiKey: () => "faux-key",
		initialState: {
			model,
			systemPrompt: options.systemPrompt ?? "You are a test assistant.",
			tools: options.tools ?? [],
		},
		streamFn,
	});

	const sessionManager = SessionManager.inMemory();
	const settingsManager = SettingsManager.create(tempDir, tempDir);

	if (options.settings) {
		settingsManager.applyOverrides(options.settings);
	}

	const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
	authStorage.setRuntimeApiKey(model.provider, "faux-key");
	const modelRegistry = new ModelRegistry(authStorage, tempDir);

	const session = new AgentSession({
		agent,
		sessionManager,
		settingsManager,
		cwd: tempDir,
		modelRegistry,
		resourceLoader,
		baseToolsOverride: options.baseToolsOverride,
	});

	const events: AgentSessionEvent[] = [];
	session.subscribe((event) => {
		events.push(event);
	});

	const cleanup = () => {
		session.dispose();
		if (existsSync(tempDir)) {
			rmSync(tempDir, { recursive: true });
		}
	};

	return {
		session,
		agent,
		sessionManager,
		settingsManager,
		faux: fauxState,
		events,
		eventsOfType<T extends AgentSessionEvent["type"]>(type: T) {
			return events.filter((e): e is Extract<AgentSessionEvent, { type: T }> => e.type === type);
		},
		tempDir,
		cleanup,
	};
}

export function createHarness(options: HarnessOptions = {}): Harness {
	if (options.extensionFactories?.length) {
		throw new Error("createHarness does not support extensionFactories. Use createHarnessWithExtensions().");
	}

	const tempDir = createTempDir();
	return createHarnessWithResourceLoader(options, options.resourceLoader ?? createTestResourceLoader(), tempDir);
}

export async function createHarnessWithExtensions(options: HarnessOptions = {}): Promise<Harness> {
	const tempDir = createTempDir();
	const extensionsResult = await createTestExtensionsResult(options.extensionFactories ?? [], tempDir);
	const resourceLoader = options.resourceLoader ?? createTestResourceLoader({ extensionsResult });
	return createHarnessWithResourceLoader(options, resourceLoader, tempDir);
}