Files
ai-terminal-assistant/tests/unit/context/token-counter.test.ts
T
kurihada 729fb2d42a feat: 添加完整的单元测试套件
- 新增 vitest 测试框架配置
- 添加 54 个测试文件,共 951 个测试用例
- 覆盖核心模块:
  - Agent: executor, registry, config-loader, permission-merger
  - Context: manager, compaction, prune, token-counter
  - Permission: manager, bash/file/git/web checkers, wildcard
  - Session: manager, storage
  - Tools: filesystem (12个), git (10个), web, shell, todo, task
  - LSP: client, server, language
  - Utils: config, diff
  - UI: terminal
2025-12-11 14:45:24 +08:00

350 lines
11 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { describe, it, expect } from 'vitest';
import { TokenCounter } from '../../../src/context/token-counter.js';
import type { ModelMessage } from 'ai';
describe('TokenCounter - Token 计数器', () => {
describe('estimateText - 文本估算', () => {
it('空文本返回 0', () => {
expect(TokenCounter.estimateText('')).toBe(0);
expect(TokenCounter.estimateText(null as unknown as string)).toBe(0);
expect(TokenCounter.estimateText(undefined as unknown as string)).toBe(0);
});
it('纯英文估算(约 4 字符/token', () => {
// 40 个字符 / 4 = 10 tokens
const text = 'This is a test message with some words.';
const tokens = TokenCounter.estimateText(text);
expect(tokens).toBe(Math.ceil(text.length / 4));
});
it('纯中文估算(约 1.5 字符/token', () => {
// 6 个中文字符 / 1.5 = 4 tokens
const text = '这是测试文本';
const tokens = TokenCounter.estimateText(text);
expect(tokens).toBe(Math.ceil(6 / 1.5));
});
it('中英混合估算', () => {
// 中文 4 个 + 英文 10 个
// 4/1.5 + 10/4 = 2.67 + 2.5 = 5.17 -> 6
const text = '测试test文本text';
const tokens = TokenCounter.estimateText(text);
// 4 个中文: 4/1.5 = 2.67
// 8 个其他: 8/4 = 2
// 总计: ceil(4.67) = 5
expect(tokens).toBeGreaterThan(0);
expect(tokens).toBeLessThan(text.length); // 应该小于字符数
});
it('代码片段估算', () => {
const code = `function hello() {
console.log("Hello World");
return true;
}`;
const tokens = TokenCounter.estimateText(code);
expect(tokens).toBeGreaterThan(0);
// 代码主要是英文,约 4 字符/token
expect(tokens).toBeLessThan(code.length);
});
it('长文本估算', () => {
const longText = 'a'.repeat(10000);
const tokens = TokenCounter.estimateText(longText);
// 10000 / 4 = 2500
expect(tokens).toBe(2500);
});
});
describe('estimateContent - 内容估算', () => {
it('字符串内容', () => {
const content = 'Hello World';
const tokens = TokenCounter.estimateContent(content);
expect(tokens).toBe(TokenCounter.estimateText(content));
});
it('数组内容 - 纯文本部分', () => {
const content = ['Hello', 'World'];
const tokens = TokenCounter.estimateContent(content);
const expected = TokenCounter.estimateText('Hello') + TokenCounter.estimateText('World');
expect(tokens).toBe(expected);
});
it('数组内容 - text 对象', () => {
const content = [{ type: 'text', text: 'Hello World' }];
const tokens = TokenCounter.estimateContent(content);
expect(tokens).toBe(TokenCounter.estimateText('Hello World'));
});
it('数组内容 - tool-result', () => {
const content = [
{
type: 'tool-result',
toolCallId: 'call_123',
toolName: 'read_file',
result: { success: true, output: 'file content' },
},
];
const tokens = TokenCounter.estimateContent(content);
const expectedText = JSON.stringify({ success: true, output: 'file content' });
expect(tokens).toBe(TokenCounter.estimateText(expectedText));
});
it('数组内容 - tool-call', () => {
const content = [
{
type: 'tool-call',
toolCallId: 'call_123',
toolName: 'read_file',
args: { path: '/test.txt' },
},
];
const tokens = TokenCounter.estimateContent(content);
const argsText = JSON.stringify({ path: '/test.txt' });
// 工具调用增加 20 token 开销
expect(tokens).toBe(TokenCounter.estimateText(argsText) + 20);
});
it('混合内容', () => {
const content = [
{ type: 'text', text: 'Processing file' },
{
type: 'tool-call',
toolCallId: 'call_1',
toolName: 'read_file',
args: { path: '/a.txt' },
},
];
const tokens = TokenCounter.estimateContent(content);
expect(tokens).toBeGreaterThan(0);
});
it('空数组返回 0', () => {
expect(TokenCounter.estimateContent([])).toBe(0);
});
it('非字符串非数组返回 0', () => {
expect(TokenCounter.estimateContent(null as unknown as string)).toBe(0);
expect(TokenCounter.estimateContent(123 as unknown as string)).toBe(0);
});
});
describe('estimateMessage - 单条消息估算', () => {
it('用户消息', () => {
const message: ModelMessage = {
role: 'user',
content: 'Hello',
};
const tokens = TokenCounter.estimateMessage(message);
// 4 (角色开销) + 内容 tokens
expect(tokens).toBe(4 + TokenCounter.estimateText('Hello'));
});
it('助手消息', () => {
const message: ModelMessage = {
role: 'assistant',
content: 'I can help you with that.',
};
const tokens = TokenCounter.estimateMessage(message);
expect(tokens).toBe(4 + TokenCounter.estimateText('I can help you with that.'));
});
it('系统消息', () => {
const message: ModelMessage = {
role: 'system',
content: 'You are a helpful assistant.',
};
const tokens = TokenCounter.estimateMessage(message);
expect(tokens).toBe(4 + TokenCounter.estimateText('You are a helpful assistant.'));
});
it('工具消息', () => {
const message: ModelMessage = {
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId: 'call_123',
toolName: 'bash',
result: { success: true, output: 'done' },
},
],
};
const tokens = TokenCounter.estimateMessage(message);
expect(tokens).toBeGreaterThan(4); // 至少有角色开销
});
});
describe('estimateMessages - 消息数组估算', () => {
it('空数组返回 0', () => {
expect(TokenCounter.estimateMessages([])).toBe(0);
});
it('单条消息', () => {
const messages: ModelMessage[] = [{ role: 'user', content: 'Hello' }];
const tokens = TokenCounter.estimateMessages(messages);
// 消息 tokens + 3 (分隔开销)
expect(tokens).toBe(TokenCounter.estimateMessage(messages[0]) + 3);
});
it('多条消息', () => {
const messages: ModelMessage[] = [
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi there!' },
{ role: 'user', content: 'How are you?' },
];
const tokens = TokenCounter.estimateMessages(messages);
const msgTokens = messages.reduce((sum, m) => sum + TokenCounter.estimateMessage(m), 0);
const separatorTokens = messages.length * 3;
expect(tokens).toBe(msgTokens + separatorTokens);
});
it('包含工具调用的对话', () => {
const messages: ModelMessage[] = [
{ role: 'user', content: '读取 /tmp/test.txt' },
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'call_1',
toolName: 'read_file',
args: { path: '/tmp/test.txt' },
},
],
},
{
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId: 'call_1',
toolName: 'read_file',
result: { success: true, output: 'file content here' },
},
],
},
{ role: 'assistant', content: '文件内容是: file content here' },
];
const tokens = TokenCounter.estimateMessages(messages);
expect(tokens).toBeGreaterThan(0);
// 应该能处理复杂的消息结构
});
});
describe('format - 格式化显示', () => {
it('小于 1000 显示原数', () => {
expect(TokenCounter.format(0)).toBe('0');
expect(TokenCounter.format(100)).toBe('100');
expect(TokenCounter.format(999)).toBe('999');
});
it('大于等于 1000 显示 k 单位', () => {
expect(TokenCounter.format(1000)).toBe('1.0k');
expect(TokenCounter.format(1500)).toBe('1.5k');
expect(TokenCounter.format(10000)).toBe('10.0k');
expect(TokenCounter.format(100000)).toBe('100.0k');
});
it('小数精度', () => {
expect(TokenCounter.format(1234)).toBe('1.2k');
expect(TokenCounter.format(1250)).toBe('1.3k'); // 四舍五入
expect(TokenCounter.format(12345)).toBe('12.3k');
});
});
});
describe('TokenCounter 实际场景测试', () => {
it('典型对话 token 估算', () => {
const messages: ModelMessage[] = [
{
role: 'system',
content:
'You are a helpful coding assistant. Help users with programming tasks.',
},
{
role: 'user',
content: '请帮我写一个 Python 函数来计算斐波那契数列',
},
{
role: 'assistant',
content: `好的,这是一个计算斐波那契数列的 Python 函数:
\`\`\`python
def fibonacci(n):
if n <= 0:
return []
elif n == 1:
return [0]
elif n == 2:
return [0, 1]
fib = [0, 1]
for i in range(2, n):
fib.append(fib[i-1] + fib[i-2])
return fib
\`\`\``,
},
];
const tokens = TokenCounter.estimateMessages(messages);
expect(tokens).toBeGreaterThan(100); // 应该有一定数量的 tokens
expect(tokens).toBeLessThan(1000); // 但不会太多
});
it('大量工具调用的 token 估算', () => {
const messages: ModelMessage[] = [];
// 模拟 10 轮工具调用
for (let i = 0; i < 10; i++) {
messages.push({
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: `call_${i}`,
toolName: 'bash',
args: { command: `echo "iteration ${i}"` },
},
],
});
messages.push({
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId: `call_${i}`,
toolName: 'bash',
result: { success: true, output: `iteration ${i}` },
},
],
});
}
const tokens = TokenCounter.estimateMessages(messages);
expect(tokens).toBeGreaterThan(0);
// 20 条消息应该有合理的 token 数
expect(TokenCounter.format(tokens)).toBeDefined();
});
it('上下文窗口占用估算', () => {
// 模拟 200k token 上下文窗口
const maxContextTokens = 200000;
// 创建一个大消息
const largeContent = 'a'.repeat(40000); // 约 10k tokens
const messages: ModelMessage[] = [
{ role: 'user', content: largeContent },
];
const tokens = TokenCounter.estimateMessages(messages);
const usagePercent = (tokens / maxContextTokens) * 100;
expect(usagePercent).toBeLessThan(10); // 应该占用不到 10%
expect(usagePercent).toBeGreaterThan(0);
});
});