feat: 添加对话压缩功能和上下文使用情况显示
- 新增 context 模块实现 Prune 和 Compaction 压缩策略 - Prune: 将旧工具调用结果替换为占位符 - Compaction: 使用 AI 生成对话摘要 - CLI 提示符显示上下文使用量 [used/available] - 添加 /compact 命令手动压缩对话 - 添加 /context 命令查看上下文详情 - Agent 集成自动压缩 (85%阈值) 和强制压缩功能
This commit is contained in:
@@ -0,0 +1,196 @@
|
||||
import { generateText, type ModelMessage, type LanguageModel } from 'ai';
|
||||
import { TokenCounter } from './token-counter.js';
|
||||
import {
|
||||
SUMMARY_MARKER,
|
||||
type CompressionConfig,
|
||||
DEFAULT_COMPRESSION_CONFIG,
|
||||
} from './types.js';
|
||||
|
||||
/**
|
||||
* 摘要生成系统提示词
|
||||
*/
|
||||
const COMPACTION_SYSTEM_PROMPT = `你是一个专门生成对话摘要的助手。你的任务是将对话历史压缩成一个简洁但信息完整的摘要。
|
||||
|
||||
摘要应该包含:
|
||||
1. 已完成的工作和关键结果
|
||||
2. 当前正在进行的任务
|
||||
3. 涉及的重要文件和代码
|
||||
4. 用户的关键需求和约束
|
||||
5. 下一步需要做的事情
|
||||
|
||||
要求:
|
||||
- 保留关键技术细节(文件路径、函数名、配置等)
|
||||
- 使用简洁的列表格式
|
||||
- 不要遗漏重要信息
|
||||
- 使用中文回复`;
|
||||
|
||||
/**
|
||||
* 摘要生成用户提示词
|
||||
*/
|
||||
const COMPACTION_USER_PROMPT = `请总结上面的对话。这个摘要将是对话继续时唯一可用的上下文,所以要保留关键信息,包括:完成了什么、正在进行的工作、涉及的文件、下一步计划、以及用户的关键需求或约束。要简洁但足够详细,以便工作可以无缝继续。`;
|
||||
|
||||
/**
|
||||
* 检查消息是否为摘要消息
|
||||
*/
|
||||
export function isSummaryMessage(message: ModelMessage): boolean {
|
||||
if (typeof message.content === 'string') {
|
||||
return message.content.includes(SUMMARY_MARKER);
|
||||
}
|
||||
if (Array.isArray(message.content)) {
|
||||
return message.content.some(
|
||||
(part) =>
|
||||
typeof part === 'object' &&
|
||||
'text' in part &&
|
||||
typeof part.text === 'string' &&
|
||||
part.text.includes(SUMMARY_MARKER)
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建摘要消息
|
||||
*/
|
||||
function createSummaryMessage(summary: string): ModelMessage {
|
||||
return {
|
||||
role: 'assistant',
|
||||
content: `${SUMMARY_MARKER}\n## 对话摘要\n\n${summary}\n${SUMMARY_MARKER}`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Compaction 策略:使用 AI 生成对话摘要
|
||||
*
|
||||
* 逻辑:
|
||||
* 1. 将历史消息(排除最近保护的部分)发送给 AI
|
||||
* 2. AI 生成摘要
|
||||
* 3. 用摘要消息替换旧消息
|
||||
* 4. 保留最近的消息不变
|
||||
*
|
||||
* @param messages 消息数组
|
||||
* @param model 语言模型
|
||||
* @param config 压缩配置
|
||||
* @returns 压缩后的消息数组和释放的 tokens
|
||||
*/
|
||||
export async function compact(
|
||||
messages: ModelMessage[],
|
||||
model: LanguageModel,
|
||||
config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
|
||||
): Promise<{ messages: ModelMessage[]; freedTokens: number }> {
|
||||
const { pruneProtect } = config;
|
||||
|
||||
// 计算需要保护的消息数量
|
||||
let protectedTokens = 0;
|
||||
let protectedCount = 0;
|
||||
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const tokens = TokenCounter.estimateMessage(messages[i]);
|
||||
if (protectedTokens + tokens > pruneProtect) {
|
||||
break;
|
||||
}
|
||||
protectedTokens += tokens;
|
||||
protectedCount++;
|
||||
}
|
||||
|
||||
// 确保至少保护最后 2 条消息(除非 pruneProtect 为 0,表示强制压缩模式)
|
||||
if (pruneProtect > 0) {
|
||||
protectedCount = Math.max(protectedCount, 2);
|
||||
} else {
|
||||
// 强制压缩模式:至少保护 1 条消息
|
||||
protectedCount = Math.max(protectedCount, 1);
|
||||
}
|
||||
|
||||
// 分割消息:需要压缩的部分 vs 保护的部分
|
||||
const toCompact = messages.slice(0, messages.length - protectedCount);
|
||||
const toKeep = messages.slice(messages.length - protectedCount);
|
||||
|
||||
// 如果没有需要压缩的消息,直接返回
|
||||
if (toCompact.length === 0) {
|
||||
return { messages, freedTokens: 0 };
|
||||
}
|
||||
|
||||
// 检查是否已有摘要消息
|
||||
const existingSummaryIndex = toCompact.findIndex(isSummaryMessage);
|
||||
const messagesForSummary =
|
||||
existingSummaryIndex >= 0 ? toCompact.slice(existingSummaryIndex) : toCompact;
|
||||
|
||||
// 计算压缩前的 tokens
|
||||
const beforeTokens = TokenCounter.estimateMessages(toCompact);
|
||||
|
||||
try {
|
||||
// 调用 AI 生成摘要
|
||||
const result = await generateText({
|
||||
model,
|
||||
system: COMPACTION_SYSTEM_PROMPT,
|
||||
messages: [
|
||||
...messagesForSummary,
|
||||
{
|
||||
role: 'user',
|
||||
content: COMPACTION_USER_PROMPT,
|
||||
},
|
||||
],
|
||||
maxOutputTokens: 2000,
|
||||
});
|
||||
|
||||
const summaryMessage = createSummaryMessage(result.text);
|
||||
const afterTokens = TokenCounter.estimateMessage(summaryMessage);
|
||||
|
||||
// 返回:摘要 + 保护的消息
|
||||
return {
|
||||
messages: [summaryMessage, ...toKeep],
|
||||
freedTokens: beforeTokens - afterTokens,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('生成摘要失败:', error);
|
||||
// 失败时返回原消息
|
||||
return { messages, freedTokens: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 简单压缩:不使用 AI,直接截断旧消息
|
||||
* 用于没有模型可用或快速压缩的场景
|
||||
*/
|
||||
export function simpleCompact(
|
||||
messages: ModelMessage[],
|
||||
config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
|
||||
): { messages: ModelMessage[]; freedTokens: number } {
|
||||
const { pruneProtect } = config;
|
||||
|
||||
// 计算需要保留的消息
|
||||
let keptTokens = 0;
|
||||
let keepFromIndex = messages.length;
|
||||
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const tokens = TokenCounter.estimateMessage(messages[i]);
|
||||
if (keptTokens + tokens > pruneProtect) {
|
||||
break;
|
||||
}
|
||||
keptTokens += tokens;
|
||||
keepFromIndex = i;
|
||||
}
|
||||
|
||||
// 确保至少保留最后 N 条消息(强制模式下保留 1 条,否则保留 2 条)
|
||||
const minKeep = pruneProtect > 0 ? 2 : 1;
|
||||
keepFromIndex = Math.min(keepFromIndex, messages.length - minKeep);
|
||||
|
||||
const removed = messages.slice(0, keepFromIndex);
|
||||
const kept = messages.slice(keepFromIndex);
|
||||
|
||||
if (removed.length === 0) {
|
||||
return { messages, freedTokens: 0 };
|
||||
}
|
||||
|
||||
// 创建简单摘要
|
||||
const simpleSummary: ModelMessage = {
|
||||
role: 'assistant',
|
||||
content: `${SUMMARY_MARKER}\n[对话历史已压缩,共移除 ${removed.length} 条消息]\n${SUMMARY_MARKER}`,
|
||||
};
|
||||
|
||||
const freedTokens = TokenCounter.estimateMessages(removed);
|
||||
|
||||
return {
|
||||
messages: [simpleSummary, ...kept],
|
||||
freedTokens,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// 类型导出
|
||||
export type {
|
||||
TokenUsage,
|
||||
CompressionConfig,
|
||||
CompressionContext,
|
||||
CompressionResult,
|
||||
} from './types.js';
|
||||
|
||||
export {
|
||||
DEFAULT_COMPRESSION_CONFIG,
|
||||
COMPACTED_PLACEHOLDER,
|
||||
SUMMARY_MARKER,
|
||||
COMPACTED_MARKER,
|
||||
} from './types.js';
|
||||
|
||||
// Token 计数器
|
||||
export { TokenCounter } from './token-counter.js';
|
||||
|
||||
// Prune 策略
|
||||
export { prune, filterCompacted } from './prune.js';
|
||||
|
||||
// Compaction 策略
|
||||
export { compact, simpleCompact, isSummaryMessage } from './compaction.js';
|
||||
|
||||
// 压缩管理器
|
||||
export { CompressionManager, compressionManager } from './manager.js';
|
||||
@@ -0,0 +1,238 @@
|
||||
import type { ModelMessage, LanguageModel } from 'ai';
|
||||
import { TokenCounter } from './token-counter.js';
|
||||
import { prune, filterCompacted } from './prune.js';
|
||||
import { compact, simpleCompact, isSummaryMessage } from './compaction.js';
|
||||
import {
|
||||
type TokenUsage,
|
||||
type CompressionConfig,
|
||||
type CompressionResult,
|
||||
DEFAULT_COMPRESSION_CONFIG,
|
||||
} from './types.js';
|
||||
|
||||
/**
|
||||
* 压缩管理器
|
||||
* 统一管理对话上下文的压缩策略
|
||||
*/
|
||||
export class CompressionManager {
|
||||
private config: CompressionConfig;
|
||||
private model: LanguageModel | null = null;
|
||||
|
||||
constructor(config: Partial<CompressionConfig> = {}) {
|
||||
this.config = { ...DEFAULT_COMPRESSION_CONFIG, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置用于生成摘要的模型
|
||||
*/
|
||||
setModel(model: LanguageModel): void {
|
||||
this.model = model;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取当前配置
|
||||
*/
|
||||
getConfig(): CompressionConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新配置
|
||||
*/
|
||||
updateConfig(config: Partial<CompressionConfig>): void {
|
||||
this.config = { ...this.config, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算消息数组的 token 使用情况
|
||||
*/
|
||||
calculateUsage(messages: ModelMessage[]): TokenUsage {
|
||||
const input = TokenCounter.estimateMessages(messages);
|
||||
const { contextLimit, outputReserve } = this.config;
|
||||
const available = contextLimit - outputReserve;
|
||||
const usagePercent = (input / available) * 100;
|
||||
|
||||
return {
|
||||
input,
|
||||
contextLimit,
|
||||
available,
|
||||
usagePercent: Math.min(usagePercent, 100),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否需要压缩(超过溢出阈值)
|
||||
*/
|
||||
shouldCompress(messages: ModelMessage[]): boolean {
|
||||
const usage = this.calculateUsage(messages);
|
||||
return usage.usagePercent >= this.config.overflowThreshold * 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否溢出(超过可用空间)
|
||||
*/
|
||||
isOverflow(messages: ModelMessage[]): boolean {
|
||||
const usage = this.calculateUsage(messages);
|
||||
return usage.input >= usage.available;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 prune 策略
|
||||
*/
|
||||
prune(messages: ModelMessage[]): { messages: ModelMessage[]; freedTokens: number } {
|
||||
return prune(messages, this.config);
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 compaction 策略
|
||||
*/
|
||||
async compact(messages: ModelMessage[]): Promise<{ messages: ModelMessage[]; freedTokens: number }> {
|
||||
if (this.model) {
|
||||
return compact(messages, this.model, this.config);
|
||||
}
|
||||
// 没有模型时使用简单压缩
|
||||
return simpleCompact(messages, this.config);
|
||||
}
|
||||
|
||||
/**
|
||||
* 自动压缩:先 prune,不够再 compact
|
||||
*/
|
||||
async compress(messages: ModelMessage[]): Promise<CompressionResult> {
|
||||
let result = [...messages];
|
||||
let totalFreed = 0;
|
||||
let type: CompressionResult['type'] = 'prune';
|
||||
|
||||
// 第一步:尝试 prune
|
||||
const pruneResult = this.prune(result);
|
||||
if (pruneResult.freedTokens > 0) {
|
||||
result = pruneResult.messages;
|
||||
totalFreed += pruneResult.freedTokens;
|
||||
}
|
||||
|
||||
// 检查是否还需要进一步压缩
|
||||
if (this.shouldCompress(result)) {
|
||||
// 第二步:执行 compaction
|
||||
const compactResult = await this.compact(result);
|
||||
if (compactResult.freedTokens > 0) {
|
||||
result = compactResult.messages;
|
||||
totalFreed += compactResult.freedTokens;
|
||||
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
messages: result,
|
||||
freedTokens: totalFreed,
|
||||
type,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 强制压缩(用于 /compact 命令)
|
||||
* 无论是否达到阈值都执行压缩
|
||||
*/
|
||||
async forceCompress(messages: ModelMessage[]): Promise<CompressionResult> {
|
||||
// 消息数量太少时不压缩(至少需要 4 条消息)
|
||||
if (messages.length <= 4) {
|
||||
return {
|
||||
messages,
|
||||
freedTokens: 0,
|
||||
type: 'prune',
|
||||
};
|
||||
}
|
||||
|
||||
let result = [...messages];
|
||||
let totalFreed = 0;
|
||||
let type: CompressionResult['type'] = 'prune';
|
||||
|
||||
// 先尝试 prune(使用强制配置)
|
||||
const pruneConfig: CompressionConfig = {
|
||||
...this.config,
|
||||
pruneMinimum: 0,
|
||||
pruneProtect: Math.min(10_000, TokenCounter.estimateMessages(messages) / 4),
|
||||
};
|
||||
|
||||
const pruneResult = prune(result, pruneConfig);
|
||||
|
||||
if (pruneResult.freedTokens > 0) {
|
||||
result = pruneResult.messages;
|
||||
totalFreed += pruneResult.freedTokens;
|
||||
}
|
||||
|
||||
// 强制 compaction:只保留最后 2 条消息
|
||||
// 计算保留消息的 tokens
|
||||
const keepCount = Math.min(2, result.length - 1);
|
||||
const toKeep = result.slice(-keepCount);
|
||||
const toCompact = result.slice(0, result.length - keepCount);
|
||||
|
||||
if (toCompact.length > 0) {
|
||||
if (this.model) {
|
||||
try {
|
||||
const compactResult = await compact(result, this.model, {
|
||||
...this.config,
|
||||
pruneProtect: 0, // 强制模式:不保护任何 tokens
|
||||
});
|
||||
if (compactResult.freedTokens > 0) {
|
||||
result = compactResult.messages;
|
||||
totalFreed += compactResult.freedTokens;
|
||||
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
|
||||
}
|
||||
} catch {
|
||||
// AI 压缩失败,使用简单压缩
|
||||
const compactResult = simpleCompact(result, {
|
||||
...this.config,
|
||||
pruneProtect: 0,
|
||||
});
|
||||
if (compactResult.freedTokens > 0) {
|
||||
result = compactResult.messages;
|
||||
totalFreed += compactResult.freedTokens;
|
||||
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const compactResult = simpleCompact(result, {
|
||||
...this.config,
|
||||
pruneProtect: 0,
|
||||
});
|
||||
if (compactResult.freedTokens > 0) {
|
||||
result = compactResult.messages;
|
||||
totalFreed += compactResult.freedTokens;
|
||||
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
messages: result,
|
||||
freedTokens: totalFreed,
|
||||
type,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 过滤已压缩的内容(发送给模型前调用)
|
||||
*/
|
||||
filterCompacted(messages: ModelMessage[]): ModelMessage[] {
|
||||
return filterCompacted(messages);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查消息是否为摘要消息
|
||||
*/
|
||||
isSummaryMessage(message: ModelMessage): boolean {
|
||||
return isSummaryMessage(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化 token 使用情况(用于 CLI 显示)
|
||||
*/
|
||||
formatUsage(messages: ModelMessage[]): string {
|
||||
const usage = this.calculateUsage(messages);
|
||||
const used = TokenCounter.format(usage.input);
|
||||
const limit = TokenCounter.format(usage.available);
|
||||
const percent = usage.usagePercent.toFixed(0);
|
||||
return `${used}/${limit} (${percent}%)`;
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例(可选使用)
|
||||
export const compressionManager = new CompressionManager();
|
||||
@@ -0,0 +1,187 @@
|
||||
import type { ModelMessage } from 'ai';
|
||||
import { TokenCounter } from './token-counter.js';
|
||||
import {
|
||||
COMPACTED_PLACEHOLDER,
|
||||
SUMMARY_MARKER,
|
||||
COMPACTED_MARKER,
|
||||
type CompressionConfig,
|
||||
DEFAULT_COMPRESSION_CONFIG,
|
||||
} from './types.js';
|
||||
|
||||
// 扩展的工具结果类型,支持压缩标记
|
||||
interface CompactedToolResult {
|
||||
type: 'tool-result';
|
||||
toolCallId: string;
|
||||
result: unknown;
|
||||
[COMPACTED_MARKER]?: {
|
||||
compactedAt: number;
|
||||
originalSize: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查消息是否为摘要消息
|
||||
*/
|
||||
function isSummaryMessage(message: ModelMessage): boolean {
|
||||
if (typeof message.content === 'string') {
|
||||
return message.content.includes(SUMMARY_MARKER);
|
||||
}
|
||||
if (Array.isArray(message.content)) {
|
||||
return message.content.some(
|
||||
(part) =>
|
||||
typeof part === 'object' &&
|
||||
part !== null &&
|
||||
'text' in part &&
|
||||
typeof (part as { text?: unknown }).text === 'string' &&
|
||||
((part as { text: string }).text).includes(SUMMARY_MARKER)
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否为工具结果
|
||||
*/
|
||||
function isToolResult(part: unknown): part is CompactedToolResult {
|
||||
return (
|
||||
typeof part === 'object' &&
|
||||
part !== null &&
|
||||
(part as { type?: unknown }).type === 'tool-result'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查工具结果是否已压缩
|
||||
*/
|
||||
function isCompactedResult(part: unknown): boolean {
|
||||
if (!isToolResult(part)) return false;
|
||||
return COMPACTED_MARKER in part;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取工具结果的 token 数量
|
||||
*/
|
||||
function getToolResultTokens(part: unknown): number {
|
||||
if (!isToolResult(part)) return 0;
|
||||
return TokenCounter.estimateText(JSON.stringify(part.result));
|
||||
}
|
||||
|
||||
/**
|
||||
* 压缩工具结果
|
||||
*/
|
||||
function compactToolResult(part: CompactedToolResult): CompactedToolResult {
|
||||
const originalSize = TokenCounter.estimateText(JSON.stringify(part.result));
|
||||
return {
|
||||
...part,
|
||||
result: COMPACTED_PLACEHOLDER,
|
||||
[COMPACTED_MARKER]: {
|
||||
compactedAt: Date.now(),
|
||||
originalSize,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune 策略:压缩旧的工具调用结果
|
||||
*
|
||||
* 逻辑:
|
||||
* 1. 从后往前遍历消息
|
||||
* 2. 跳过最近 pruneProtect tokens 的工具结果
|
||||
* 3. 遇到 summary 消息停止
|
||||
* 4. 将超出保护范围的 tool-result 替换为占位符
|
||||
*
|
||||
* @param messages 消息数组
|
||||
* @param config 压缩配置
|
||||
* @returns 处理后的消息数组和释放的 tokens
|
||||
*/
|
||||
export function prune(
|
||||
messages: ModelMessage[],
|
||||
config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
|
||||
): { messages: ModelMessage[]; freedTokens: number } {
|
||||
const { pruneProtect, pruneMinimum } = config;
|
||||
|
||||
// 深拷贝消息数组
|
||||
const result = JSON.parse(JSON.stringify(messages)) as ModelMessage[];
|
||||
|
||||
let protectedTokens = 0;
|
||||
let freedTokens = 0;
|
||||
const toPrune: Array<{ msgIndex: number; partIndex: number; tokens: number }> = [];
|
||||
|
||||
// 从后往前遍历
|
||||
for (let msgIndex = result.length - 1; msgIndex >= 0; msgIndex--) {
|
||||
const message = result[msgIndex];
|
||||
|
||||
// 遇到摘要消息停止
|
||||
if (isSummaryMessage(message)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// 只处理包含工具结果的消息
|
||||
if (!Array.isArray(message.content)) continue;
|
||||
|
||||
for (let partIndex = message.content.length - 1; partIndex >= 0; partIndex--) {
|
||||
const part = message.content[partIndex];
|
||||
|
||||
// 跳过非工具结果
|
||||
if (!isToolResult(part)) continue;
|
||||
|
||||
// 跳过已压缩的
|
||||
if (isCompactedResult(part)) {
|
||||
break; // 遇到已压缩的,说明之前已经 prune 过,停止
|
||||
}
|
||||
|
||||
const tokens = getToolResultTokens(part);
|
||||
protectedTokens += tokens;
|
||||
|
||||
// 超出保护范围的标记为待压缩
|
||||
if (protectedTokens > pruneProtect) {
|
||||
toPrune.push({ msgIndex, partIndex, tokens });
|
||||
freedTokens += tokens;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果释放的 tokens 不够最小量,不执行压缩
|
||||
if (freedTokens < pruneMinimum) {
|
||||
return { messages, freedTokens: 0 };
|
||||
}
|
||||
|
||||
// 执行压缩
|
||||
for (const { msgIndex, partIndex } of toPrune) {
|
||||
const message = result[msgIndex];
|
||||
if (Array.isArray(message.content)) {
|
||||
const part = message.content[partIndex];
|
||||
if (isToolResult(part)) {
|
||||
// 使用 any 来绕过严格类型检查,因为我们在运行时知道这是安全的
|
||||
(message.content as unknown[])[partIndex] = compactToolResult(part);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { messages: result, freedTokens };
|
||||
}
|
||||
|
||||
/**
|
||||
* 过滤已压缩的内容(用于发送给模型前)
|
||||
* 将已压缩的工具结果替换为占位符文本
|
||||
*/
|
||||
export function filterCompacted(messages: ModelMessage[]): ModelMessage[] {
|
||||
return messages.map((message) => {
|
||||
if (!Array.isArray(message.content)) return message;
|
||||
|
||||
const filteredContent = message.content.map((part) => {
|
||||
if (isCompactedResult(part) && isToolResult(part)) {
|
||||
return {
|
||||
...part,
|
||||
result: COMPACTED_PLACEHOLDER,
|
||||
};
|
||||
}
|
||||
return part;
|
||||
});
|
||||
|
||||
return {
|
||||
...message,
|
||||
content: filteredContent,
|
||||
} as ModelMessage;
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
import type { ModelMessage } from 'ai';
|
||||
|
||||
/**
|
||||
* Token 计数器
|
||||
* 使用简单的字符估算,不依赖外部库
|
||||
* 估算规则:
|
||||
* - 中文字符:约 1.5 字符/token
|
||||
* - 英文/数字:约 4 字符/token
|
||||
* - 混合内容取平均
|
||||
*/
|
||||
export class TokenCounter {
|
||||
/**
|
||||
* 估算文本的 token 数量
|
||||
*/
|
||||
static estimateText(text: string): number {
|
||||
if (!text) return 0;
|
||||
|
||||
// 统计中文字符数量
|
||||
const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length;
|
||||
// 其他字符数量
|
||||
const otherChars = text.length - chineseChars;
|
||||
|
||||
// 中文约 1.5 字符/token,其他约 4 字符/token
|
||||
const chineseTokens = chineseChars / 1.5;
|
||||
const otherTokens = otherChars / 4;
|
||||
|
||||
return Math.ceil(chineseTokens + otherTokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算消息内容的 token 数量
|
||||
*/
|
||||
static estimateContent(content: ModelMessage['content']): number {
|
||||
if (typeof content === 'string') {
|
||||
return this.estimateText(content);
|
||||
}
|
||||
|
||||
if (Array.isArray(content)) {
|
||||
let total = 0;
|
||||
for (const part of content) {
|
||||
if (typeof part === 'string') {
|
||||
total += this.estimateText(part);
|
||||
} else if ('text' in part && typeof part.text === 'string') {
|
||||
total += this.estimateText(part.text);
|
||||
} else if ('result' in part) {
|
||||
// tool-result
|
||||
total += this.estimateText(JSON.stringify(part.result));
|
||||
} else if ('args' in part) {
|
||||
// tool-call
|
||||
total += this.estimateText(JSON.stringify(part.args));
|
||||
total += 20; // 工具名称等开销
|
||||
} else {
|
||||
// 其他类型,序列化估算
|
||||
total += this.estimateText(JSON.stringify(part));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算单条消息的 token 数量
|
||||
*/
|
||||
static estimateMessage(message: ModelMessage): number {
|
||||
let tokens = 0;
|
||||
|
||||
// 角色标记开销
|
||||
tokens += 4;
|
||||
|
||||
// 内容
|
||||
tokens += this.estimateContent(message.content);
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算消息数组的总 token 数量
|
||||
*/
|
||||
static estimateMessages(messages: ModelMessage[]): number {
|
||||
let total = 0;
|
||||
for (const message of messages) {
|
||||
total += this.estimateMessage(message);
|
||||
}
|
||||
// 消息间分隔开销
|
||||
total += messages.length * 3;
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化 token 数量显示
|
||||
*/
|
||||
static format(tokens: number): string {
|
||||
if (tokens >= 1000) {
|
||||
return `${(tokens / 1000).toFixed(1)}k`;
|
||||
}
|
||||
return `${tokens}`;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
import type { LanguageModel } from 'ai';
|
||||
|
||||
/**
|
||||
* Token 使用统计
|
||||
*/
|
||||
export interface TokenUsage {
|
||||
/** 输入 tokens(估算) */
|
||||
input: number;
|
||||
/** 上下文限制 */
|
||||
contextLimit: number;
|
||||
/** 可用空间(contextLimit - outputReserve) */
|
||||
available: number;
|
||||
/** 使用百分比 (0-100) */
|
||||
usagePercent: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 压缩配置
|
||||
*/
|
||||
export interface CompressionConfig {
|
||||
/** 模型上下文限制 (默认 200k) */
|
||||
contextLimit: number;
|
||||
/** 预留输出 tokens (默认 32k) */
|
||||
outputReserve: number;
|
||||
/** 保护最近 tokens 不被 prune (默认 40k) */
|
||||
pruneProtect: number;
|
||||
/** 最小清理量才执行 prune (默认 20k) */
|
||||
pruneMinimum: number;
|
||||
/** 溢出阈值,超过此比例触发自动压缩 (默认 0.85) */
|
||||
overflowThreshold: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 默认压缩配置
|
||||
*/
|
||||
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
|
||||
contextLimit: 200_000,
|
||||
outputReserve: 32_000,
|
||||
pruneProtect: 40_000,
|
||||
pruneMinimum: 20_000,
|
||||
overflowThreshold: 0.85,
|
||||
};
|
||||
|
||||
/**
|
||||
* 压缩占位符
|
||||
*/
|
||||
export const COMPACTED_PLACEHOLDER = '[此工具输出已压缩]';
|
||||
|
||||
/**
|
||||
* 摘要消息标记 key
|
||||
*/
|
||||
export const SUMMARY_MARKER = '__summary__';
|
||||
|
||||
/**
|
||||
* 工具结果压缩标记 key
|
||||
*/
|
||||
export const COMPACTED_MARKER = '__compacted__';
|
||||
|
||||
/**
|
||||
* 压缩上下文(传递给压缩器的参数)
|
||||
*/
|
||||
export interface CompressionContext {
|
||||
config: CompressionConfig;
|
||||
model?: LanguageModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* 压缩结果
|
||||
*/
|
||||
export interface CompressionResult {
|
||||
/** 压缩后的消息 */
|
||||
messages: import('ai').ModelMessage[];
|
||||
/** 释放的 tokens */
|
||||
freedTokens: number;
|
||||
/** 压缩类型 */
|
||||
type: 'prune' | 'compaction' | 'both';
|
||||
}
|
||||
+61
-1
@@ -12,6 +12,11 @@ import type { Tool, ToolResult, Message, AgentConfig, ProviderType } from '../ty
|
||||
import { buildZodSchema } from '../types/index.js';
|
||||
import { ToolRegistry } from '../tools/registry.js';
|
||||
import { SessionManager } from '../session/index.js';
|
||||
import {
|
||||
CompressionManager,
|
||||
type TokenUsage,
|
||||
type CompressionConfig,
|
||||
} from '../context/index.js';
|
||||
|
||||
// Provider 工厂函数类型
|
||||
type ProviderFactory = (apiKey: string) => (model: string) => LanguageModel;
|
||||
@@ -45,7 +50,10 @@ export class Agent {
|
||||
// 会话管理器(可选)
|
||||
private sessionManager: SessionManager | null = null;
|
||||
|
||||
constructor(config: AgentConfig) {
|
||||
// 压缩管理器
|
||||
private compressionManager: CompressionManager;
|
||||
|
||||
constructor(config: AgentConfig, compressionConfig?: Partial<CompressionConfig>) {
|
||||
this.config = config;
|
||||
|
||||
const providerFactory = providers[config.provider];
|
||||
@@ -53,6 +61,11 @@ export class Agent {
|
||||
throw new Error(`不支持的 provider: ${config.provider}`);
|
||||
}
|
||||
this.getModel = providerFactory(config.apiKey);
|
||||
|
||||
// 初始化压缩管理器
|
||||
this.compressionManager = new CompressionManager(compressionConfig);
|
||||
// 设置模型用于生成摘要
|
||||
this.compressionManager.setModel(this.getModel(config.model));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -230,6 +243,17 @@ export class Agent {
|
||||
// 将完整的响应消息添加到历史(包括工具调用和结果)
|
||||
this.conversationHistory.push(...responseMessages);
|
||||
|
||||
// 检查是否需要自动压缩
|
||||
if (this.compressionManager.shouldCompress(this.conversationHistory)) {
|
||||
const result = await this.compressionManager.compress(this.conversationHistory);
|
||||
if (result.freedTokens > 0) {
|
||||
this.conversationHistory = result.messages;
|
||||
if (onStream) {
|
||||
onStream(`\n[自动压缩: 释放了 ${(result.freedTokens / 1000).toFixed(1)}k tokens]\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 持久化会话
|
||||
await this.persistSession();
|
||||
|
||||
@@ -294,4 +318,40 @@ export class Agent {
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取当前上下文使用情况
|
||||
*/
|
||||
getContextUsage(): TokenUsage {
|
||||
return this.compressionManager.calculateUsage(this.conversationHistory);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取格式化的上下文使用情况(用于 CLI 显示)
|
||||
*/
|
||||
getContextUsageFormatted(): string {
|
||||
return this.compressionManager.formatUsage(this.conversationHistory);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取压缩管理器
|
||||
*/
|
||||
getCompressionManager(): CompressionManager {
|
||||
return this.compressionManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* 手动压缩对话历史(用于 /compact 命令)
|
||||
*/
|
||||
async compactHistory(): Promise<{ freedTokens: number; type: string }> {
|
||||
const result = await this.compressionManager.forceCompress(this.conversationHistory);
|
||||
if (result.freedTokens > 0) {
|
||||
this.conversationHistory = result.messages;
|
||||
await this.persistSession();
|
||||
}
|
||||
return {
|
||||
freedTokens: result.freedTokens,
|
||||
type: result.type,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
+70
-8
@@ -28,14 +28,36 @@ export class TerminalUI {
|
||||
console.log(chalk.cyan('║') + chalk.gray(' Powered by DeepSeek / Claude ') + chalk.cyan('║'));
|
||||
console.log(chalk.cyan('╚════════════════════════════════════════╝\n'));
|
||||
console.log(chalk.gray('输入你的问题,或使用以下命令:'));
|
||||
console.log(chalk.yellow(' /help') + chalk.gray(' - 显示帮助'));
|
||||
console.log(chalk.yellow(' /clear') + chalk.gray(' - 清空对话历史'));
|
||||
console.log(chalk.yellow(' /exit') + chalk.gray(' - 退出程序'));
|
||||
console.log(chalk.yellow(' /help') + chalk.gray(' - 显示帮助'));
|
||||
console.log(chalk.yellow(' /clear') + chalk.gray(' - 清空对话历史'));
|
||||
console.log(chalk.yellow(' /compact') + chalk.gray(' - 压缩对话历史'));
|
||||
console.log(chalk.yellow(' /context') + chalk.gray(' - 查看上下文使用情况'));
|
||||
console.log(chalk.yellow(' /exit') + chalk.gray(' - 退出程序'));
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// 格式化上下文使用情况(带颜色)
|
||||
private formatContextUsage(): string {
|
||||
const usage = this.agent.getContextUsage();
|
||||
const percent = usage.usagePercent;
|
||||
|
||||
// 根据使用率选择颜色
|
||||
let colorFn: (text: string) => string;
|
||||
if (percent < 50) {
|
||||
colorFn = chalk.green;
|
||||
} else if (percent < 80) {
|
||||
colorFn = chalk.yellow;
|
||||
} else {
|
||||
colorFn = chalk.red;
|
||||
}
|
||||
|
||||
const used = usage.input >= 1000 ? `${(usage.input / 1000).toFixed(1)}k` : `${usage.input}`;
|
||||
const limit = `${(usage.available / 1000).toFixed(0)}k`;
|
||||
return colorFn(`[${used}/${limit}]`);
|
||||
}
|
||||
|
||||
// 处理特殊命令
|
||||
private handleCommand(input: string): boolean {
|
||||
private async handleCommand(input: string): Promise<boolean> {
|
||||
const command = input.toLowerCase().trim();
|
||||
|
||||
switch (command) {
|
||||
@@ -47,14 +69,52 @@ export class TerminalUI {
|
||||
console.log(chalk.gray(' • 搜索代码'));
|
||||
console.log(chalk.gray(' • 回答编程问题'));
|
||||
console.log('');
|
||||
console.log(chalk.white(' 命令:'));
|
||||
console.log(chalk.gray(' • /help - 显示此帮助'));
|
||||
console.log(chalk.gray(' • /clear - 清空对话历史'));
|
||||
console.log(chalk.gray(' • /compact - 压缩对话历史,释放上下文空间'));
|
||||
console.log(chalk.gray(' • /context - 显示当前上下文使用情况'));
|
||||
console.log(chalk.gray(' • /exit - 退出程序'));
|
||||
console.log('');
|
||||
return true;
|
||||
|
||||
case '/clear':
|
||||
// clearHistory 现在是异步的
|
||||
void this.agent.clearHistory();
|
||||
await this.agent.clearHistory();
|
||||
console.log(chalk.green('✓ 对话历史已清空\n'));
|
||||
return true;
|
||||
|
||||
case '/compact':
|
||||
console.log(chalk.yellow('正在压缩对话历史...\n'));
|
||||
try {
|
||||
const beforeUsage = this.agent.getContextUsage();
|
||||
const result = await this.agent.compactHistory();
|
||||
const afterUsage = this.agent.getContextUsage();
|
||||
|
||||
if (result.freedTokens > 0) {
|
||||
console.log(chalk.green(`✓ 压缩完成!`));
|
||||
console.log(chalk.gray(` 策略: ${result.type}`));
|
||||
console.log(chalk.gray(` 释放: ${(result.freedTokens / 1000).toFixed(1)}k tokens`));
|
||||
console.log(chalk.gray(` 之前: ${(beforeUsage.input / 1000).toFixed(1)}k`));
|
||||
console.log(chalk.gray(` 之后: ${(afterUsage.input / 1000).toFixed(1)}k`));
|
||||
} else {
|
||||
console.log(chalk.yellow('没有可压缩的内容'));
|
||||
}
|
||||
console.log('');
|
||||
} catch (error) {
|
||||
console.log(chalk.red(`压缩失败: ${error instanceof Error ? error.message : String(error)}\n`));
|
||||
}
|
||||
return true;
|
||||
|
||||
case '/context':
|
||||
const usage = this.agent.getContextUsage();
|
||||
console.log(chalk.cyan('\n📊 上下文使用情况:'));
|
||||
console.log(chalk.gray(` 已使用: ${(usage.input / 1000).toFixed(1)}k tokens`));
|
||||
console.log(chalk.gray(` 可用: ${(usage.available / 1000).toFixed(0)}k tokens`));
|
||||
console.log(chalk.gray(` 上下文限制: ${(usage.contextLimit / 1000).toFixed(0)}k tokens`));
|
||||
console.log(chalk.gray(` 使用率: ${usage.usagePercent.toFixed(1)}%`));
|
||||
console.log('');
|
||||
return true;
|
||||
|
||||
case '/exit':
|
||||
case '/quit':
|
||||
console.log(chalk.cyan('\n👋 再见!\n'));
|
||||
@@ -74,7 +134,9 @@ export class TerminalUI {
|
||||
return;
|
||||
}
|
||||
|
||||
this.rl.question(chalk.green('You > '), (answer) => {
|
||||
// 显示带上下文使用情况的提示符
|
||||
const contextInfo = this.formatContextUsage();
|
||||
this.rl.question(`${contextInfo} ${chalk.green('You >')} `, (answer) => {
|
||||
resolve(answer ?? '');
|
||||
});
|
||||
});
|
||||
@@ -95,7 +157,7 @@ export class TerminalUI {
|
||||
|
||||
// 处理命令
|
||||
if (input.startsWith('/')) {
|
||||
if (this.handleCommand(input)) {
|
||||
if (await this.handleCommand(input)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user