From c6f8ba95ec548aa359745221e5ce645cad2f7200 Mon Sep 17 00:00:00 2001 From: kurihada Date: Thu, 11 Dec 2025 10:59:43 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9=E8=AF=9D?= =?UTF-8?q?=E5=8E=8B=E7=BC=A9=E5=8A=9F=E8=83=BD=E5=92=8C=E4=B8=8A=E4=B8=8B?= =?UTF-8?q?=E6=96=87=E4=BD=BF=E7=94=A8=E6=83=85=E5=86=B5=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 context 模块实现 Prune 和 Compaction 压缩策略 - Prune: 将旧工具调用结果替换为占位符 - Compaction: 使用 AI 生成对话摘要 - CLI 提示符显示上下文使用量 [used/available] - 添加 /compact 命令手动压缩对话 - 添加 /context 命令查看上下文详情 - Agent 集成自动压缩 (85%阈值) 和强制压缩功能 --- src/context/compaction.ts | 196 +++++++++++++++++++++++++++++ src/context/index.ts | 26 ++++ src/context/manager.ts | 238 +++++++++++++++++++++++++++++++++++ src/context/prune.ts | 187 +++++++++++++++++++++++++++ src/context/token-counter.ts | 100 +++++++++++++++ src/context/types.ts | 77 ++++++++++++ src/core/agent.ts | 62 ++++++++- src/ui/terminal.ts | 78 ++++++++++-- 8 files changed, 955 insertions(+), 9 deletions(-) create mode 100644 src/context/compaction.ts create mode 100644 src/context/index.ts create mode 100644 src/context/manager.ts create mode 100644 src/context/prune.ts create mode 100644 src/context/token-counter.ts create mode 100644 src/context/types.ts diff --git a/src/context/compaction.ts b/src/context/compaction.ts new file mode 100644 index 0000000..3d7e36a --- /dev/null +++ b/src/context/compaction.ts @@ -0,0 +1,196 @@ +import { generateText, type ModelMessage, type LanguageModel } from 'ai'; +import { TokenCounter } from './token-counter.js'; +import { + SUMMARY_MARKER, + type CompressionConfig, + DEFAULT_COMPRESSION_CONFIG, +} from './types.js'; + +/** + * 摘要生成系统提示词 + */ +const COMPACTION_SYSTEM_PROMPT = `你是一个专门生成对话摘要的助手。你的任务是将对话历史压缩成一个简洁但信息完整的摘要。 + +摘要应该包含: +1. 已完成的工作和关键结果 +2. 当前正在进行的任务 +3. 涉及的重要文件和代码 +4. 用户的关键需求和约束 +5. 下一步需要做的事情 + +要求: +- 保留关键技术细节(文件路径、函数名、配置等) +- 使用简洁的列表格式 +- 不要遗漏重要信息 +- 使用中文回复`; + +/** + * 摘要生成用户提示词 + */ +const COMPACTION_USER_PROMPT = `请总结上面的对话。这个摘要将是对话继续时唯一可用的上下文,所以要保留关键信息,包括:完成了什么、正在进行的工作、涉及的文件、下一步计划、以及用户的关键需求或约束。要简洁但足够详细,以便工作可以无缝继续。`; + +/** + * 检查消息是否为摘要消息 + */ +export function isSummaryMessage(message: ModelMessage): boolean { + if (typeof message.content === 'string') { + return message.content.includes(SUMMARY_MARKER); + } + if (Array.isArray(message.content)) { + return message.content.some( + (part) => + typeof part === 'object' && + 'text' in part && + typeof part.text === 'string' && + part.text.includes(SUMMARY_MARKER) + ); + } + return false; +} + +/** + * 创建摘要消息 + */ +function createSummaryMessage(summary: string): ModelMessage { + return { + role: 'assistant', + content: `${SUMMARY_MARKER}\n## 对话摘要\n\n${summary}\n${SUMMARY_MARKER}`, + }; +} + +/** + * Compaction 策略:使用 AI 生成对话摘要 + * + * 逻辑: + * 1. 将历史消息(排除最近保护的部分)发送给 AI + * 2. AI 生成摘要 + * 3. 用摘要消息替换旧消息 + * 4. 保留最近的消息不变 + * + * @param messages 消息数组 + * @param model 语言模型 + * @param config 压缩配置 + * @returns 压缩后的消息数组和释放的 tokens + */ +export async function compact( + messages: ModelMessage[], + model: LanguageModel, + config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG +): Promise<{ messages: ModelMessage[]; freedTokens: number }> { + const { pruneProtect } = config; + + // 计算需要保护的消息数量 + let protectedTokens = 0; + let protectedCount = 0; + + for (let i = messages.length - 1; i >= 0; i--) { + const tokens = TokenCounter.estimateMessage(messages[i]); + if (protectedTokens + tokens > pruneProtect) { + break; + } + protectedTokens += tokens; + protectedCount++; + } + + // 确保至少保护最后 2 条消息(除非 pruneProtect 为 0,表示强制压缩模式) + if (pruneProtect > 0) { + protectedCount = Math.max(protectedCount, 2); + } else { + // 强制压缩模式:至少保护 1 条消息 + protectedCount = Math.max(protectedCount, 1); + } + + // 分割消息:需要压缩的部分 vs 保护的部分 + const toCompact = messages.slice(0, messages.length - protectedCount); + const toKeep = messages.slice(messages.length - protectedCount); + + // 如果没有需要压缩的消息,直接返回 + if (toCompact.length === 0) { + return { messages, freedTokens: 0 }; + } + + // 检查是否已有摘要消息 + const existingSummaryIndex = toCompact.findIndex(isSummaryMessage); + const messagesForSummary = + existingSummaryIndex >= 0 ? toCompact.slice(existingSummaryIndex) : toCompact; + + // 计算压缩前的 tokens + const beforeTokens = TokenCounter.estimateMessages(toCompact); + + try { + // 调用 AI 生成摘要 + const result = await generateText({ + model, + system: COMPACTION_SYSTEM_PROMPT, + messages: [ + ...messagesForSummary, + { + role: 'user', + content: COMPACTION_USER_PROMPT, + }, + ], + maxOutputTokens: 2000, + }); + + const summaryMessage = createSummaryMessage(result.text); + const afterTokens = TokenCounter.estimateMessage(summaryMessage); + + // 返回:摘要 + 保护的消息 + return { + messages: [summaryMessage, ...toKeep], + freedTokens: beforeTokens - afterTokens, + }; + } catch (error) { + console.error('生成摘要失败:', error); + // 失败时返回原消息 + return { messages, freedTokens: 0 }; + } +} + +/** + * 简单压缩:不使用 AI,直接截断旧消息 + * 用于没有模型可用或快速压缩的场景 + */ +export function simpleCompact( + messages: ModelMessage[], + config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG +): { messages: ModelMessage[]; freedTokens: number } { + const { pruneProtect } = config; + + // 计算需要保留的消息 + let keptTokens = 0; + let keepFromIndex = messages.length; + + for (let i = messages.length - 1; i >= 0; i--) { + const tokens = TokenCounter.estimateMessage(messages[i]); + if (keptTokens + tokens > pruneProtect) { + break; + } + keptTokens += tokens; + keepFromIndex = i; + } + + // 确保至少保留最后 N 条消息(强制模式下保留 1 条,否则保留 2 条) + const minKeep = pruneProtect > 0 ? 2 : 1; + keepFromIndex = Math.min(keepFromIndex, messages.length - minKeep); + + const removed = messages.slice(0, keepFromIndex); + const kept = messages.slice(keepFromIndex); + + if (removed.length === 0) { + return { messages, freedTokens: 0 }; + } + + // 创建简单摘要 + const simpleSummary: ModelMessage = { + role: 'assistant', + content: `${SUMMARY_MARKER}\n[对话历史已压缩,共移除 ${removed.length} 条消息]\n${SUMMARY_MARKER}`, + }; + + const freedTokens = TokenCounter.estimateMessages(removed); + + return { + messages: [simpleSummary, ...kept], + freedTokens, + }; +} diff --git a/src/context/index.ts b/src/context/index.ts new file mode 100644 index 0000000..83095f8 --- /dev/null +++ b/src/context/index.ts @@ -0,0 +1,26 @@ +// 类型导出 +export type { + TokenUsage, + CompressionConfig, + CompressionContext, + CompressionResult, +} from './types.js'; + +export { + DEFAULT_COMPRESSION_CONFIG, + COMPACTED_PLACEHOLDER, + SUMMARY_MARKER, + COMPACTED_MARKER, +} from './types.js'; + +// Token 计数器 +export { TokenCounter } from './token-counter.js'; + +// Prune 策略 +export { prune, filterCompacted } from './prune.js'; + +// Compaction 策略 +export { compact, simpleCompact, isSummaryMessage } from './compaction.js'; + +// 压缩管理器 +export { CompressionManager, compressionManager } from './manager.js'; diff --git a/src/context/manager.ts b/src/context/manager.ts new file mode 100644 index 0000000..83b7bc6 --- /dev/null +++ b/src/context/manager.ts @@ -0,0 +1,238 @@ +import type { ModelMessage, LanguageModel } from 'ai'; +import { TokenCounter } from './token-counter.js'; +import { prune, filterCompacted } from './prune.js'; +import { compact, simpleCompact, isSummaryMessage } from './compaction.js'; +import { + type TokenUsage, + type CompressionConfig, + type CompressionResult, + DEFAULT_COMPRESSION_CONFIG, +} from './types.js'; + +/** + * 压缩管理器 + * 统一管理对话上下文的压缩策略 + */ +export class CompressionManager { + private config: CompressionConfig; + private model: LanguageModel | null = null; + + constructor(config: Partial = {}) { + this.config = { ...DEFAULT_COMPRESSION_CONFIG, ...config }; + } + + /** + * 设置用于生成摘要的模型 + */ + setModel(model: LanguageModel): void { + this.model = model; + } + + /** + * 获取当前配置 + */ + getConfig(): CompressionConfig { + return { ...this.config }; + } + + /** + * 更新配置 + */ + updateConfig(config: Partial): void { + this.config = { ...this.config, ...config }; + } + + /** + * 计算消息数组的 token 使用情况 + */ + calculateUsage(messages: ModelMessage[]): TokenUsage { + const input = TokenCounter.estimateMessages(messages); + const { contextLimit, outputReserve } = this.config; + const available = contextLimit - outputReserve; + const usagePercent = (input / available) * 100; + + return { + input, + contextLimit, + available, + usagePercent: Math.min(usagePercent, 100), + }; + } + + /** + * 检查是否需要压缩(超过溢出阈值) + */ + shouldCompress(messages: ModelMessage[]): boolean { + const usage = this.calculateUsage(messages); + return usage.usagePercent >= this.config.overflowThreshold * 100; + } + + /** + * 检查是否溢出(超过可用空间) + */ + isOverflow(messages: ModelMessage[]): boolean { + const usage = this.calculateUsage(messages); + return usage.input >= usage.available; + } + + /** + * 执行 prune 策略 + */ + prune(messages: ModelMessage[]): { messages: ModelMessage[]; freedTokens: number } { + return prune(messages, this.config); + } + + /** + * 执行 compaction 策略 + */ + async compact(messages: ModelMessage[]): Promise<{ messages: ModelMessage[]; freedTokens: number }> { + if (this.model) { + return compact(messages, this.model, this.config); + } + // 没有模型时使用简单压缩 + return simpleCompact(messages, this.config); + } + + /** + * 自动压缩:先 prune,不够再 compact + */ + async compress(messages: ModelMessage[]): Promise { + let result = [...messages]; + let totalFreed = 0; + let type: CompressionResult['type'] = 'prune'; + + // 第一步:尝试 prune + const pruneResult = this.prune(result); + if (pruneResult.freedTokens > 0) { + result = pruneResult.messages; + totalFreed += pruneResult.freedTokens; + } + + // 检查是否还需要进一步压缩 + if (this.shouldCompress(result)) { + // 第二步:执行 compaction + const compactResult = await this.compact(result); + if (compactResult.freedTokens > 0) { + result = compactResult.messages; + totalFreed += compactResult.freedTokens; + type = pruneResult.freedTokens > 0 ? 'both' : 'compaction'; + } + } + + return { + messages: result, + freedTokens: totalFreed, + type, + }; + } + + /** + * 强制压缩(用于 /compact 命令) + * 无论是否达到阈值都执行压缩 + */ + async forceCompress(messages: ModelMessage[]): Promise { + // 消息数量太少时不压缩(至少需要 4 条消息) + if (messages.length <= 4) { + return { + messages, + freedTokens: 0, + type: 'prune', + }; + } + + let result = [...messages]; + let totalFreed = 0; + let type: CompressionResult['type'] = 'prune'; + + // 先尝试 prune(使用强制配置) + const pruneConfig: CompressionConfig = { + ...this.config, + pruneMinimum: 0, + pruneProtect: Math.min(10_000, TokenCounter.estimateMessages(messages) / 4), + }; + + const pruneResult = prune(result, pruneConfig); + + if (pruneResult.freedTokens > 0) { + result = pruneResult.messages; + totalFreed += pruneResult.freedTokens; + } + + // 强制 compaction:只保留最后 2 条消息 + // 计算保留消息的 tokens + const keepCount = Math.min(2, result.length - 1); + const toKeep = result.slice(-keepCount); + const toCompact = result.slice(0, result.length - keepCount); + + if (toCompact.length > 0) { + if (this.model) { + try { + const compactResult = await compact(result, this.model, { + ...this.config, + pruneProtect: 0, // 强制模式:不保护任何 tokens + }); + if (compactResult.freedTokens > 0) { + result = compactResult.messages; + totalFreed += compactResult.freedTokens; + type = pruneResult.freedTokens > 0 ? 'both' : 'compaction'; + } + } catch { + // AI 压缩失败,使用简单压缩 + const compactResult = simpleCompact(result, { + ...this.config, + pruneProtect: 0, + }); + if (compactResult.freedTokens > 0) { + result = compactResult.messages; + totalFreed += compactResult.freedTokens; + type = pruneResult.freedTokens > 0 ? 'both' : 'compaction'; + } + } + } else { + const compactResult = simpleCompact(result, { + ...this.config, + pruneProtect: 0, + }); + if (compactResult.freedTokens > 0) { + result = compactResult.messages; + totalFreed += compactResult.freedTokens; + type = pruneResult.freedTokens > 0 ? 'both' : 'compaction'; + } + } + } + + return { + messages: result, + freedTokens: totalFreed, + type, + }; + } + + /** + * 过滤已压缩的内容(发送给模型前调用) + */ + filterCompacted(messages: ModelMessage[]): ModelMessage[] { + return filterCompacted(messages); + } + + /** + * 检查消息是否为摘要消息 + */ + isSummaryMessage(message: ModelMessage): boolean { + return isSummaryMessage(message); + } + + /** + * 格式化 token 使用情况(用于 CLI 显示) + */ + formatUsage(messages: ModelMessage[]): string { + const usage = this.calculateUsage(messages); + const used = TokenCounter.format(usage.input); + const limit = TokenCounter.format(usage.available); + const percent = usage.usagePercent.toFixed(0); + return `${used}/${limit} (${percent}%)`; + } +} + +// 导出单例(可选使用) +export const compressionManager = new CompressionManager(); diff --git a/src/context/prune.ts b/src/context/prune.ts new file mode 100644 index 0000000..53e7dc5 --- /dev/null +++ b/src/context/prune.ts @@ -0,0 +1,187 @@ +import type { ModelMessage } from 'ai'; +import { TokenCounter } from './token-counter.js'; +import { + COMPACTED_PLACEHOLDER, + SUMMARY_MARKER, + COMPACTED_MARKER, + type CompressionConfig, + DEFAULT_COMPRESSION_CONFIG, +} from './types.js'; + +// 扩展的工具结果类型,支持压缩标记 +interface CompactedToolResult { + type: 'tool-result'; + toolCallId: string; + result: unknown; + [COMPACTED_MARKER]?: { + compactedAt: number; + originalSize: number; + }; +} + +/** + * 检查消息是否为摘要消息 + */ +function isSummaryMessage(message: ModelMessage): boolean { + if (typeof message.content === 'string') { + return message.content.includes(SUMMARY_MARKER); + } + if (Array.isArray(message.content)) { + return message.content.some( + (part) => + typeof part === 'object' && + part !== null && + 'text' in part && + typeof (part as { text?: unknown }).text === 'string' && + ((part as { text: string }).text).includes(SUMMARY_MARKER) + ); + } + return false; +} + +/** + * 检查是否为工具结果 + */ +function isToolResult(part: unknown): part is CompactedToolResult { + return ( + typeof part === 'object' && + part !== null && + (part as { type?: unknown }).type === 'tool-result' + ); +} + +/** + * 检查工具结果是否已压缩 + */ +function isCompactedResult(part: unknown): boolean { + if (!isToolResult(part)) return false; + return COMPACTED_MARKER in part; +} + +/** + * 获取工具结果的 token 数量 + */ +function getToolResultTokens(part: unknown): number { + if (!isToolResult(part)) return 0; + return TokenCounter.estimateText(JSON.stringify(part.result)); +} + +/** + * 压缩工具结果 + */ +function compactToolResult(part: CompactedToolResult): CompactedToolResult { + const originalSize = TokenCounter.estimateText(JSON.stringify(part.result)); + return { + ...part, + result: COMPACTED_PLACEHOLDER, + [COMPACTED_MARKER]: { + compactedAt: Date.now(), + originalSize, + }, + }; +} + +/** + * Prune 策略:压缩旧的工具调用结果 + * + * 逻辑: + * 1. 从后往前遍历消息 + * 2. 跳过最近 pruneProtect tokens 的工具结果 + * 3. 遇到 summary 消息停止 + * 4. 将超出保护范围的 tool-result 替换为占位符 + * + * @param messages 消息数组 + * @param config 压缩配置 + * @returns 处理后的消息数组和释放的 tokens + */ +export function prune( + messages: ModelMessage[], + config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG +): { messages: ModelMessage[]; freedTokens: number } { + const { pruneProtect, pruneMinimum } = config; + + // 深拷贝消息数组 + const result = JSON.parse(JSON.stringify(messages)) as ModelMessage[]; + + let protectedTokens = 0; + let freedTokens = 0; + const toPrune: Array<{ msgIndex: number; partIndex: number; tokens: number }> = []; + + // 从后往前遍历 + for (let msgIndex = result.length - 1; msgIndex >= 0; msgIndex--) { + const message = result[msgIndex]; + + // 遇到摘要消息停止 + if (isSummaryMessage(message)) { + break; + } + + // 只处理包含工具结果的消息 + if (!Array.isArray(message.content)) continue; + + for (let partIndex = message.content.length - 1; partIndex >= 0; partIndex--) { + const part = message.content[partIndex]; + + // 跳过非工具结果 + if (!isToolResult(part)) continue; + + // 跳过已压缩的 + if (isCompactedResult(part)) { + break; // 遇到已压缩的,说明之前已经 prune 过,停止 + } + + const tokens = getToolResultTokens(part); + protectedTokens += tokens; + + // 超出保护范围的标记为待压缩 + if (protectedTokens > pruneProtect) { + toPrune.push({ msgIndex, partIndex, tokens }); + freedTokens += tokens; + } + } + } + + // 如果释放的 tokens 不够最小量,不执行压缩 + if (freedTokens < pruneMinimum) { + return { messages, freedTokens: 0 }; + } + + // 执行压缩 + for (const { msgIndex, partIndex } of toPrune) { + const message = result[msgIndex]; + if (Array.isArray(message.content)) { + const part = message.content[partIndex]; + if (isToolResult(part)) { + // 使用 any 来绕过严格类型检查,因为我们在运行时知道这是安全的 + (message.content as unknown[])[partIndex] = compactToolResult(part); + } + } + } + + return { messages: result, freedTokens }; +} + +/** + * 过滤已压缩的内容(用于发送给模型前) + * 将已压缩的工具结果替换为占位符文本 + */ +export function filterCompacted(messages: ModelMessage[]): ModelMessage[] { + return messages.map((message) => { + if (!Array.isArray(message.content)) return message; + + const filteredContent = message.content.map((part) => { + if (isCompactedResult(part) && isToolResult(part)) { + return { + ...part, + result: COMPACTED_PLACEHOLDER, + }; + } + return part; + }); + + return { + ...message, + content: filteredContent, + } as ModelMessage; + }); +} diff --git a/src/context/token-counter.ts b/src/context/token-counter.ts new file mode 100644 index 0000000..d8c36ad --- /dev/null +++ b/src/context/token-counter.ts @@ -0,0 +1,100 @@ +import type { ModelMessage } from 'ai'; + +/** + * Token 计数器 + * 使用简单的字符估算,不依赖外部库 + * 估算规则: + * - 中文字符:约 1.5 字符/token + * - 英文/数字:约 4 字符/token + * - 混合内容取平均 + */ +export class TokenCounter { + /** + * 估算文本的 token 数量 + */ + static estimateText(text: string): number { + if (!text) return 0; + + // 统计中文字符数量 + const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length; + // 其他字符数量 + const otherChars = text.length - chineseChars; + + // 中文约 1.5 字符/token,其他约 4 字符/token + const chineseTokens = chineseChars / 1.5; + const otherTokens = otherChars / 4; + + return Math.ceil(chineseTokens + otherTokens); + } + + /** + * 估算消息内容的 token 数量 + */ + static estimateContent(content: ModelMessage['content']): number { + if (typeof content === 'string') { + return this.estimateText(content); + } + + if (Array.isArray(content)) { + let total = 0; + for (const part of content) { + if (typeof part === 'string') { + total += this.estimateText(part); + } else if ('text' in part && typeof part.text === 'string') { + total += this.estimateText(part.text); + } else if ('result' in part) { + // tool-result + total += this.estimateText(JSON.stringify(part.result)); + } else if ('args' in part) { + // tool-call + total += this.estimateText(JSON.stringify(part.args)); + total += 20; // 工具名称等开销 + } else { + // 其他类型,序列化估算 + total += this.estimateText(JSON.stringify(part)); + } + } + return total; + } + + return 0; + } + + /** + * 估算单条消息的 token 数量 + */ + static estimateMessage(message: ModelMessage): number { + let tokens = 0; + + // 角色标记开销 + tokens += 4; + + // 内容 + tokens += this.estimateContent(message.content); + + return tokens; + } + + /** + * 估算消息数组的总 token 数量 + */ + static estimateMessages(messages: ModelMessage[]): number { + let total = 0; + for (const message of messages) { + total += this.estimateMessage(message); + } + // 消息间分隔开销 + total += messages.length * 3; + return total; + } + + /** + * 格式化 token 数量显示 + */ + static format(tokens: number): string { + if (tokens >= 1000) { + return `${(tokens / 1000).toFixed(1)}k`; + } + return `${tokens}`; + } +} diff --git a/src/context/types.ts b/src/context/types.ts new file mode 100644 index 0000000..b7e67b2 --- /dev/null +++ b/src/context/types.ts @@ -0,0 +1,77 @@ +import type { LanguageModel } from 'ai'; + +/** + * Token 使用统计 + */ +export interface TokenUsage { + /** 输入 tokens(估算) */ + input: number; + /** 上下文限制 */ + contextLimit: number; + /** 可用空间(contextLimit - outputReserve) */ + available: number; + /** 使用百分比 (0-100) */ + usagePercent: number; +} + +/** + * 压缩配置 + */ +export interface CompressionConfig { + /** 模型上下文限制 (默认 200k) */ + contextLimit: number; + /** 预留输出 tokens (默认 32k) */ + outputReserve: number; + /** 保护最近 tokens 不被 prune (默认 40k) */ + pruneProtect: number; + /** 最小清理量才执行 prune (默认 20k) */ + pruneMinimum: number; + /** 溢出阈值,超过此比例触发自动压缩 (默认 0.85) */ + overflowThreshold: number; +} + +/** + * 默认压缩配置 + */ +export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = { + contextLimit: 200_000, + outputReserve: 32_000, + pruneProtect: 40_000, + pruneMinimum: 20_000, + overflowThreshold: 0.85, +}; + +/** + * 压缩占位符 + */ +export const COMPACTED_PLACEHOLDER = '[此工具输出已压缩]'; + +/** + * 摘要消息标记 key + */ +export const SUMMARY_MARKER = '__summary__'; + +/** + * 工具结果压缩标记 key + */ +export const COMPACTED_MARKER = '__compacted__'; + +/** + * 压缩上下文(传递给压缩器的参数) + */ +export interface CompressionContext { + config: CompressionConfig; + model?: LanguageModel; +} + +/** + * 压缩结果 + */ +export interface CompressionResult { + /** 压缩后的消息 */ + messages: import('ai').ModelMessage[]; + /** 释放的 tokens */ + freedTokens: number; + /** 压缩类型 */ + type: 'prune' | 'compaction' | 'both'; +} diff --git a/src/core/agent.ts b/src/core/agent.ts index c2b236e..8527d16 100644 --- a/src/core/agent.ts +++ b/src/core/agent.ts @@ -12,6 +12,11 @@ import type { Tool, ToolResult, Message, AgentConfig, ProviderType } from '../ty import { buildZodSchema } from '../types/index.js'; import { ToolRegistry } from '../tools/registry.js'; import { SessionManager } from '../session/index.js'; +import { + CompressionManager, + type TokenUsage, + type CompressionConfig, +} from '../context/index.js'; // Provider 工厂函数类型 type ProviderFactory = (apiKey: string) => (model: string) => LanguageModel; @@ -45,7 +50,10 @@ export class Agent { // 会话管理器(可选) private sessionManager: SessionManager | null = null; - constructor(config: AgentConfig) { + // 压缩管理器 + private compressionManager: CompressionManager; + + constructor(config: AgentConfig, compressionConfig?: Partial) { this.config = config; const providerFactory = providers[config.provider]; @@ -53,6 +61,11 @@ export class Agent { throw new Error(`不支持的 provider: ${config.provider}`); } this.getModel = providerFactory(config.apiKey); + + // 初始化压缩管理器 + this.compressionManager = new CompressionManager(compressionConfig); + // 设置模型用于生成摘要 + this.compressionManager.setModel(this.getModel(config.model)); } /** @@ -230,6 +243,17 @@ export class Agent { // 将完整的响应消息添加到历史(包括工具调用和结果) this.conversationHistory.push(...responseMessages); + // 检查是否需要自动压缩 + if (this.compressionManager.shouldCompress(this.conversationHistory)) { + const result = await this.compressionManager.compress(this.conversationHistory); + if (result.freedTokens > 0) { + this.conversationHistory = result.messages; + if (onStream) { + onStream(`\n[自动压缩: 释放了 ${(result.freedTokens / 1000).toFixed(1)}k tokens]\n`); + } + } + } + // 持久化会话 await this.persistSession(); @@ -294,4 +318,40 @@ export class Agent { }; } } + + /** + * 获取当前上下文使用情况 + */ + getContextUsage(): TokenUsage { + return this.compressionManager.calculateUsage(this.conversationHistory); + } + + /** + * 获取格式化的上下文使用情况(用于 CLI 显示) + */ + getContextUsageFormatted(): string { + return this.compressionManager.formatUsage(this.conversationHistory); + } + + /** + * 获取压缩管理器 + */ + getCompressionManager(): CompressionManager { + return this.compressionManager; + } + + /** + * 手动压缩对话历史(用于 /compact 命令) + */ + async compactHistory(): Promise<{ freedTokens: number; type: string }> { + const result = await this.compressionManager.forceCompress(this.conversationHistory); + if (result.freedTokens > 0) { + this.conversationHistory = result.messages; + await this.persistSession(); + } + return { + freedTokens: result.freedTokens, + type: result.type, + }; + } } diff --git a/src/ui/terminal.ts b/src/ui/terminal.ts index f6c6cdb..d303c88 100644 --- a/src/ui/terminal.ts +++ b/src/ui/terminal.ts @@ -28,14 +28,36 @@ export class TerminalUI { console.log(chalk.cyan('║') + chalk.gray(' Powered by DeepSeek / Claude ') + chalk.cyan('║')); console.log(chalk.cyan('╚════════════════════════════════════════╝\n')); console.log(chalk.gray('输入你的问题,或使用以下命令:')); - console.log(chalk.yellow(' /help') + chalk.gray(' - 显示帮助')); - console.log(chalk.yellow(' /clear') + chalk.gray(' - 清空对话历史')); - console.log(chalk.yellow(' /exit') + chalk.gray(' - 退出程序')); + console.log(chalk.yellow(' /help') + chalk.gray(' - 显示帮助')); + console.log(chalk.yellow(' /clear') + chalk.gray(' - 清空对话历史')); + console.log(chalk.yellow(' /compact') + chalk.gray(' - 压缩对话历史')); + console.log(chalk.yellow(' /context') + chalk.gray(' - 查看上下文使用情况')); + console.log(chalk.yellow(' /exit') + chalk.gray(' - 退出程序')); console.log(''); } + // 格式化上下文使用情况(带颜色) + private formatContextUsage(): string { + const usage = this.agent.getContextUsage(); + const percent = usage.usagePercent; + + // 根据使用率选择颜色 + let colorFn: (text: string) => string; + if (percent < 50) { + colorFn = chalk.green; + } else if (percent < 80) { + colorFn = chalk.yellow; + } else { + colorFn = chalk.red; + } + + const used = usage.input >= 1000 ? `${(usage.input / 1000).toFixed(1)}k` : `${usage.input}`; + const limit = `${(usage.available / 1000).toFixed(0)}k`; + return colorFn(`[${used}/${limit}]`); + } + // 处理特殊命令 - private handleCommand(input: string): boolean { + private async handleCommand(input: string): Promise { const command = input.toLowerCase().trim(); switch (command) { @@ -47,14 +69,52 @@ export class TerminalUI { console.log(chalk.gray(' • 搜索代码')); console.log(chalk.gray(' • 回答编程问题')); console.log(''); + console.log(chalk.white(' 命令:')); + console.log(chalk.gray(' • /help - 显示此帮助')); + console.log(chalk.gray(' • /clear - 清空对话历史')); + console.log(chalk.gray(' • /compact - 压缩对话历史,释放上下文空间')); + console.log(chalk.gray(' • /context - 显示当前上下文使用情况')); + console.log(chalk.gray(' • /exit - 退出程序')); + console.log(''); return true; case '/clear': - // clearHistory 现在是异步的 - void this.agent.clearHistory(); + await this.agent.clearHistory(); console.log(chalk.green('✓ 对话历史已清空\n')); return true; + case '/compact': + console.log(chalk.yellow('正在压缩对话历史...\n')); + try { + const beforeUsage = this.agent.getContextUsage(); + const result = await this.agent.compactHistory(); + const afterUsage = this.agent.getContextUsage(); + + if (result.freedTokens > 0) { + console.log(chalk.green(`✓ 压缩完成!`)); + console.log(chalk.gray(` 策略: ${result.type}`)); + console.log(chalk.gray(` 释放: ${(result.freedTokens / 1000).toFixed(1)}k tokens`)); + console.log(chalk.gray(` 之前: ${(beforeUsage.input / 1000).toFixed(1)}k`)); + console.log(chalk.gray(` 之后: ${(afterUsage.input / 1000).toFixed(1)}k`)); + } else { + console.log(chalk.yellow('没有可压缩的内容')); + } + console.log(''); + } catch (error) { + console.log(chalk.red(`压缩失败: ${error instanceof Error ? error.message : String(error)}\n`)); + } + return true; + + case '/context': + const usage = this.agent.getContextUsage(); + console.log(chalk.cyan('\n📊 上下文使用情况:')); + console.log(chalk.gray(` 已使用: ${(usage.input / 1000).toFixed(1)}k tokens`)); + console.log(chalk.gray(` 可用: ${(usage.available / 1000).toFixed(0)}k tokens`)); + console.log(chalk.gray(` 上下文限制: ${(usage.contextLimit / 1000).toFixed(0)}k tokens`)); + console.log(chalk.gray(` 使用率: ${usage.usagePercent.toFixed(1)}%`)); + console.log(''); + return true; + case '/exit': case '/quit': console.log(chalk.cyan('\n👋 再见!\n')); @@ -74,7 +134,9 @@ export class TerminalUI { return; } - this.rl.question(chalk.green('You > '), (answer) => { + // 显示带上下文使用情况的提示符 + const contextInfo = this.formatContextUsage(); + this.rl.question(`${contextInfo} ${chalk.green('You >')} `, (answer) => { resolve(answer ?? ''); }); }); @@ -95,7 +157,7 @@ export class TerminalUI { // 处理命令 if (input.startsWith('/')) { - if (this.handleCommand(input)) { + if (await this.handleCommand(input)) { continue; } }