183 lines
6.2 KiB
TypeScript
183 lines
6.2 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
import { getModel } from "../src/models.js";
|
|
import { streamSimple } from "../src/stream.js";
|
|
import type { Api, Context, Model, SimpleStreamOptions } from "../src/types.js";
|
|
import { resolveApiKey } from "./oauth.js";
|
|
|
|
type SimpleOptionsWithExtras = SimpleStreamOptions & Record<string, unknown>;
|
|
|
|
interface RunResult {
|
|
thinkingEventCount: number;
|
|
thinkingCharCount: number;
|
|
text: string;
|
|
outputTokens: number;
|
|
contentTypes: string[];
|
|
}
|
|
|
|
interface DisableExpectations {
|
|
requestOptions?: SimpleOptionsWithExtras;
|
|
minPongs?: number;
|
|
maxOutputTokens?: number;
|
|
}
|
|
|
|
const oauthTokens = await Promise.all([resolveApiKey("google-gemini-cli"), resolveApiKey("google-antigravity")]);
|
|
const [geminiCliToken, antigravityToken] = oauthTokens;
|
|
|
|
function makeContext(): Context {
|
|
return {
|
|
systemPrompt: "You are a precise assistant. Follow the requested output format exactly.",
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content:
|
|
"Before replying, carefully solve 36863 * 5279 internally. Then reply with the word pong repeated exactly 40 times, separated by single spaces. Do not add any other text.",
|
|
timestamp: Date.now(),
|
|
},
|
|
],
|
|
};
|
|
}
|
|
|
|
function countPongs(text: string): number {
|
|
return text.match(/\bpong\b/gi)?.length ?? 0;
|
|
}
|
|
|
|
async function runWithoutReasoning<TApi extends Api>(
|
|
model: Model<TApi>,
|
|
options: SimpleOptionsWithExtras = {},
|
|
): Promise<RunResult> {
|
|
const s = streamSimple(model, makeContext(), {
|
|
maxTokens: 160,
|
|
temperature: 0,
|
|
...options,
|
|
});
|
|
|
|
let thinkingEventCount = 0;
|
|
let thinkingCharCount = 0;
|
|
|
|
for await (const event of s) {
|
|
if (event.type === "thinking_start" || event.type === "thinking_end") {
|
|
thinkingEventCount += 1;
|
|
}
|
|
if (event.type === "thinking_delta") {
|
|
thinkingEventCount += 1;
|
|
thinkingCharCount += event.delta.length;
|
|
}
|
|
}
|
|
|
|
const response = await s.result();
|
|
expect(response.stopReason, response.errorMessage).toBe("stop");
|
|
|
|
const text = response.content
|
|
.filter((block) => block.type === "text")
|
|
.map((block) => block.text)
|
|
.join("")
|
|
.trim();
|
|
|
|
return {
|
|
thinkingEventCount,
|
|
thinkingCharCount,
|
|
text,
|
|
outputTokens: response.usage.output,
|
|
contentTypes: response.content.map((block) => block.type),
|
|
};
|
|
}
|
|
|
|
async function expectThinkingDisabledE2E<TApi extends Api>(model: Model<TApi>, expectations: DisableExpectations = {}) {
|
|
const result = await runWithoutReasoning(model, expectations.requestOptions);
|
|
|
|
expect(result.thinkingEventCount).toBe(0);
|
|
expect(result.thinkingCharCount).toBe(0);
|
|
expect(result.contentTypes).not.toContain("thinking");
|
|
expect(countPongs(result.text)).toBeGreaterThanOrEqual(expectations.minPongs ?? 35);
|
|
if (expectations.maxOutputTokens !== undefined) {
|
|
expect(result.outputTokens).toBeLessThan(expectations.maxOutputTokens);
|
|
}
|
|
}
|
|
|
|
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic thinking disable E2E", () => {
|
|
it("disables thinking for budget-based reasoning models", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("anthropic", "claude-sonnet-4-5"), {
|
|
requestOptions: { maxTokens: 320, temperature: 0 },
|
|
});
|
|
});
|
|
|
|
it("disables thinking for adaptive reasoning models", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("anthropic", "claude-sonnet-4-6"), {
|
|
requestOptions: { maxTokens: 320, temperature: 0 },
|
|
});
|
|
});
|
|
});
|
|
|
|
describe.skipIf(!process.env.GEMINI_API_KEY)("Google thinking disable E2E", () => {
|
|
it("disables thinking for Gemini 2.5", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("google", "gemini-2.5-flash"));
|
|
});
|
|
|
|
it("disables thinking for Gemini 3.x", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("google", "gemini-3-flash-preview"));
|
|
});
|
|
|
|
it("does not error when thinking is off for Gemini 3.1 Pro", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("google", "gemini-3.1-pro-preview"), {
|
|
requestOptions: { maxTokens: 512 },
|
|
minPongs: 20,
|
|
});
|
|
});
|
|
});
|
|
|
|
describe("Google Vertex thinking disable E2E", () => {
|
|
const vertexProject = process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT;
|
|
const vertexLocation = process.env.GOOGLE_CLOUD_LOCATION;
|
|
const vertexApiKey = process.env.GOOGLE_CLOUD_API_KEY;
|
|
const vertexOptions = vertexApiKey
|
|
? ({ apiKey: vertexApiKey } satisfies SimpleOptionsWithExtras)
|
|
: vertexProject && vertexLocation
|
|
? ({ project: vertexProject, location: vertexLocation } satisfies SimpleOptionsWithExtras)
|
|
: undefined;
|
|
|
|
it.skipIf(!vertexOptions)("disables thinking for Gemini 2.5", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("google-vertex", "gemini-2.5-flash"), {
|
|
requestOptions: vertexOptions,
|
|
});
|
|
});
|
|
|
|
it.skipIf(!vertexOptions)("disables thinking for Gemini 3.x", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("google-vertex", "gemini-3-flash-preview"), {
|
|
requestOptions: vertexOptions,
|
|
});
|
|
});
|
|
});
|
|
|
|
describe("Google Gemini CLI thinking disable E2E", () => {
|
|
it.skipIf(!geminiCliToken)("disables thinking for Gemini 2.5", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("google-gemini-cli", "gemini-2.5-flash"), {
|
|
requestOptions: { apiKey: geminiCliToken! },
|
|
maxOutputTokens: 100,
|
|
});
|
|
});
|
|
});
|
|
|
|
describe("Google Antigravity thinking disable E2E", () => {
|
|
it.skipIf(!antigravityToken)("disables thinking for Gemini 3.x", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("google-antigravity", "gemini-3-flash"), {
|
|
requestOptions: { apiKey: antigravityToken! },
|
|
maxOutputTokens: 100,
|
|
});
|
|
});
|
|
});
|
|
|
|
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI thinking disable E2E", () => {
|
|
it("disables thinking for Responses reasoning models", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("openai", "gpt-5.4-mini"), {
|
|
requestOptions: { temperature: undefined },
|
|
});
|
|
});
|
|
});
|
|
|
|
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter thinking disable E2E", () => {
|
|
it("disables thinking for Qwen 3.5 reasoning models", { retry: 2, timeout: 30000 }, async () => {
|
|
await expectThinkingDisabledE2E(getModel("openrouter", "qwen/qwen3.5-plus-02-15"), {
|
|
maxOutputTokens: 100,
|
|
});
|
|
});
|
|
});
|