feat: 添加对话压缩功能和上下文使用情况显示

- 新增 context 模块实现 Prune 和 Compaction 压缩策略 - Prune: 将旧工具调用结果替换为占位符 - Compaction: 使用 AI 生成对话摘要 - CLI 提示符显示上下文使用量 [used/available] - 添加 /compact 命令手动压缩对话 - 添加 /context 命令查看上下文详情 - Agent 集成自动压缩 (85%阈值) 和强制压缩功能
2025-12-11 10:59:43 +08:00
parent dddec9b6d5
commit c6f8ba95ec
8 changed files with 955 additions and 9 deletions
@@ -0,0 +1,196 @@
+import { generateText, type ModelMessage, type LanguageModel } from 'ai';
+import { TokenCounter } from './token-counter.js';
+import {
+  SUMMARY_MARKER,
+  type CompressionConfig,
+  DEFAULT_COMPRESSION_CONFIG,
+} from './types.js';
+
+/**
+ * 摘要生成系统提示词
+ */
+const COMPACTION_SYSTEM_PROMPT = `你是一个专门生成对话摘要的助手。你的任务是将对话历史压缩成一个简洁但信息完整的摘要。
+
+摘要应该包含：
+1. 已完成的工作和关键结果
+2. 当前正在进行的任务
+3. 涉及的重要文件和代码
+4. 用户的关键需求和约束
+5. 下一步需要做的事情
+
+要求：
+- 保留关键技术细节（文件路径、函数名、配置等）
+- 使用简洁的列表格式
+- 不要遗漏重要信息
+- 使用中文回复`;
+
+/**
+ * 摘要生成用户提示词
+ */
+const COMPACTION_USER_PROMPT = `请总结上面的对话。这个摘要将是对话继续时唯一可用的上下文，所以要保留关键信息，包括：完成了什么、正在进行的工作、涉及的文件、下一步计划、以及用户的关键需求或约束。要简洁但足够详细，以便工作可以无缝继续。`;
+
+/**
+ * 检查消息是否为摘要消息
+ */
+export function isSummaryMessage(message: ModelMessage): boolean {
+  if (typeof message.content === 'string') {
+    return message.content.includes(SUMMARY_MARKER);
+  }
+  if (Array.isArray(message.content)) {
+    return message.content.some(
+      (part) =>
+        typeof part === 'object' &&
+        'text' in part &&
+        typeof part.text === 'string' &&
+        part.text.includes(SUMMARY_MARKER)
+    );
+  }
+  return false;
+}
+
+/**
+ * 创建摘要消息
+ */
+function createSummaryMessage(summary: string): ModelMessage {
+  return {
+    role: 'assistant',
+    content: `${SUMMARY_MARKER}\n## 对话摘要\n\n${summary}\n${SUMMARY_MARKER}`,
+  };
+}
+
+/**
+ * Compaction 策略：使用 AI 生成对话摘要
+ *
+ * 逻辑：
+ * 1. 将历史消息（排除最近保护的部分）发送给 AI
+ * 2. AI 生成摘要
+ * 3. 用摘要消息替换旧消息
+ * 4. 保留最近的消息不变
+ *
+ * @param messages 消息数组
+ * @param model 语言模型
+ * @param config 压缩配置
+ * @returns 压缩后的消息数组和释放的 tokens
+ */
+export async function compact(
+  messages: ModelMessage[],
+  model: LanguageModel,
+  config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
+): Promise<{ messages: ModelMessage[]; freedTokens: number }> {
+  const { pruneProtect } = config;
+
+  // 计算需要保护的消息数量
+  let protectedTokens = 0;
+  let protectedCount = 0;
+
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const tokens = TokenCounter.estimateMessage(messages[i]);
+    if (protectedTokens + tokens > pruneProtect) {
+      break;
+    }
+    protectedTokens += tokens;
+    protectedCount++;
+  }
+
+  // 确保至少保护最后 2 条消息（除非 pruneProtect 为 0，表示强制压缩模式）
+  if (pruneProtect > 0) {
+    protectedCount = Math.max(protectedCount, 2);
+  } else {
+    // 强制压缩模式：至少保护 1 条消息
+    protectedCount = Math.max(protectedCount, 1);
+  }
+
+  // 分割消息：需要压缩的部分 vs 保护的部分
+  const toCompact = messages.slice(0, messages.length - protectedCount);
+  const toKeep = messages.slice(messages.length - protectedCount);
+
+  // 如果没有需要压缩的消息，直接返回
+  if (toCompact.length === 0) {
+    return { messages, freedTokens: 0 };
+  }
+
+  // 检查是否已有摘要消息
+  const existingSummaryIndex = toCompact.findIndex(isSummaryMessage);
+  const messagesForSummary =
+    existingSummaryIndex >= 0 ? toCompact.slice(existingSummaryIndex) : toCompact;
+
+  // 计算压缩前的 tokens
+  const beforeTokens = TokenCounter.estimateMessages(toCompact);
+
+  try {
+    // 调用 AI 生成摘要
+    const result = await generateText({
+      model,
+      system: COMPACTION_SYSTEM_PROMPT,
+      messages: [
+        ...messagesForSummary,
+        {
+          role: 'user',
+          content: COMPACTION_USER_PROMPT,
+        },
+      ],
+      maxOutputTokens: 2000,
+    });
+
+    const summaryMessage = createSummaryMessage(result.text);
+    const afterTokens = TokenCounter.estimateMessage(summaryMessage);
+
+    // 返回：摘要 + 保护的消息
+    return {
+      messages: [summaryMessage, ...toKeep],
+      freedTokens: beforeTokens - afterTokens,
+    };
+  } catch (error) {
+    console.error('生成摘要失败:', error);
+    // 失败时返回原消息
+    return { messages, freedTokens: 0 };
+  }
+}
+
+/**
+ * 简单压缩：不使用 AI，直接截断旧消息
+ * 用于没有模型可用或快速压缩的场景
+ */
+export function simpleCompact(
+  messages: ModelMessage[],
+  config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
+): { messages: ModelMessage[]; freedTokens: number } {
+  const { pruneProtect } = config;
+
+  // 计算需要保留的消息
+  let keptTokens = 0;
+  let keepFromIndex = messages.length;
+
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const tokens = TokenCounter.estimateMessage(messages[i]);
+    if (keptTokens + tokens > pruneProtect) {
+      break;
+    }
+    keptTokens += tokens;
+    keepFromIndex = i;
+  }
+
+  // 确保至少保留最后 N 条消息（强制模式下保留 1 条，否则保留 2 条）
+  const minKeep = pruneProtect > 0 ? 2 : 1;
+  keepFromIndex = Math.min(keepFromIndex, messages.length - minKeep);
+
+  const removed = messages.slice(0, keepFromIndex);
+  const kept = messages.slice(keepFromIndex);
+
+  if (removed.length === 0) {
+    return { messages, freedTokens: 0 };
+  }
+
+  // 创建简单摘要
+  const simpleSummary: ModelMessage = {
+    role: 'assistant',
+    content: `${SUMMARY_MARKER}\n[对话历史已压缩，共移除 ${removed.length} 条消息]\n${SUMMARY_MARKER}`,
+  };
+
+  const freedTokens = TokenCounter.estimateMessages(removed);
+
+  return {
+    messages: [simpleSummary, ...kept],
+    freedTokens,
+  };
+}
@@ -0,0 +1,26 @@
+// 类型导出
+export type {
+  TokenUsage,
+  CompressionConfig,
+  CompressionContext,
+  CompressionResult,
+} from './types.js';
+
+export {
+  DEFAULT_COMPRESSION_CONFIG,
+  COMPACTED_PLACEHOLDER,
+  SUMMARY_MARKER,
+  COMPACTED_MARKER,
+} from './types.js';
+
+// Token 计数器
+export { TokenCounter } from './token-counter.js';
+
+// Prune 策略
+export { prune, filterCompacted } from './prune.js';
+
+// Compaction 策略
+export { compact, simpleCompact, isSummaryMessage } from './compaction.js';
+
+// 压缩管理器
+export { CompressionManager, compressionManager } from './manager.js';
@@ -0,0 +1,238 @@
+import type { ModelMessage, LanguageModel } from 'ai';
+import { TokenCounter } from './token-counter.js';
+import { prune, filterCompacted } from './prune.js';
+import { compact, simpleCompact, isSummaryMessage } from './compaction.js';
+import {
+  type TokenUsage,
+  type CompressionConfig,
+  type CompressionResult,
+  DEFAULT_COMPRESSION_CONFIG,
+} from './types.js';
+
+/**
+ * 压缩管理器
+ * 统一管理对话上下文的压缩策略
+ */
+export class CompressionManager {
+  private config: CompressionConfig;
+  private model: LanguageModel | null = null;
+
+  constructor(config: Partial<CompressionConfig> = {}) {
+    this.config = { ...DEFAULT_COMPRESSION_CONFIG, ...config };
+  }
+
+  /**
+   * 设置用于生成摘要的模型
+   */
+  setModel(model: LanguageModel): void {
+    this.model = model;
+  }
+
+  /**
+   * 获取当前配置
+   */
+  getConfig(): CompressionConfig {
+    return { ...this.config };
+  }
+
+  /**
+   * 更新配置
+   */
+  updateConfig(config: Partial<CompressionConfig>): void {
+    this.config = { ...this.config, ...config };
+  }
+
+  /**
+   * 计算消息数组的 token 使用情况
+   */
+  calculateUsage(messages: ModelMessage[]): TokenUsage {
+    const input = TokenCounter.estimateMessages(messages);
+    const { contextLimit, outputReserve } = this.config;
+    const available = contextLimit - outputReserve;
+    const usagePercent = (input / available) * 100;
+
+    return {
+      input,
+      contextLimit,
+      available,
+      usagePercent: Math.min(usagePercent, 100),
+    };
+  }
+
+  /**
+   * 检查是否需要压缩（超过溢出阈值）
+   */
+  shouldCompress(messages: ModelMessage[]): boolean {
+    const usage = this.calculateUsage(messages);
+    return usage.usagePercent >= this.config.overflowThreshold * 100;
+  }
+
+  /**
+   * 检查是否溢出（超过可用空间）
+   */
+  isOverflow(messages: ModelMessage[]): boolean {
+    const usage = this.calculateUsage(messages);
+    return usage.input >= usage.available;
+  }
+
+  /**
+   * 执行 prune 策略
+   */
+  prune(messages: ModelMessage[]): { messages: ModelMessage[]; freedTokens: number } {
+    return prune(messages, this.config);
+  }
+
+  /**
+   * 执行 compaction 策略
+   */
+  async compact(messages: ModelMessage[]): Promise<{ messages: ModelMessage[]; freedTokens: number }> {
+    if (this.model) {
+      return compact(messages, this.model, this.config);
+    }
+    // 没有模型时使用简单压缩
+    return simpleCompact(messages, this.config);
+  }
+
+  /**
+   * 自动压缩：先 prune，不够再 compact
+   */
+  async compress(messages: ModelMessage[]): Promise<CompressionResult> {
+    let result = [...messages];
+    let totalFreed = 0;
+    let type: CompressionResult['type'] = 'prune';
+
+    // 第一步：尝试 prune
+    const pruneResult = this.prune(result);
+    if (pruneResult.freedTokens > 0) {
+      result = pruneResult.messages;
+      totalFreed += pruneResult.freedTokens;
+    }
+
+    // 检查是否还需要进一步压缩
+    if (this.shouldCompress(result)) {
+      // 第二步：执行 compaction
+      const compactResult = await this.compact(result);
+      if (compactResult.freedTokens > 0) {
+        result = compactResult.messages;
+        totalFreed += compactResult.freedTokens;
+        type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
+      }
+    }
+
+    return {
+      messages: result,
+      freedTokens: totalFreed,
+      type,
+    };
+  }
+
+  /**
+   * 强制压缩（用于 /compact 命令）
+   * 无论是否达到阈值都执行压缩
+   */
+  async forceCompress(messages: ModelMessage[]): Promise<CompressionResult> {
+    // 消息数量太少时不压缩（至少需要 4 条消息）
+    if (messages.length <= 4) {
+      return {
+        messages,
+        freedTokens: 0,
+        type: 'prune',
+      };
+    }
+
+    let result = [...messages];
+    let totalFreed = 0;
+    let type: CompressionResult['type'] = 'prune';
+
+    // 先尝试 prune（使用强制配置）
+    const pruneConfig: CompressionConfig = {
+      ...this.config,
+      pruneMinimum: 0,
+      pruneProtect: Math.min(10_000, TokenCounter.estimateMessages(messages) / 4),
+    };
+
+    const pruneResult = prune(result, pruneConfig);
+
+    if (pruneResult.freedTokens > 0) {
+      result = pruneResult.messages;
+      totalFreed += pruneResult.freedTokens;
+    }
+
+    // 强制 compaction：只保留最后 2 条消息
+    // 计算保留消息的 tokens
+    const keepCount = Math.min(2, result.length - 1);
+    const toKeep = result.slice(-keepCount);
+    const toCompact = result.slice(0, result.length - keepCount);
+
+    if (toCompact.length > 0) {
+      if (this.model) {
+        try {
+          const compactResult = await compact(result, this.model, {
+            ...this.config,
+            pruneProtect: 0, // 强制模式：不保护任何 tokens
+          });
+          if (compactResult.freedTokens > 0) {
+            result = compactResult.messages;
+            totalFreed += compactResult.freedTokens;
+            type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
+          }
+        } catch {
+          // AI 压缩失败，使用简单压缩
+          const compactResult = simpleCompact(result, {
+            ...this.config,
+            pruneProtect: 0,
+          });
+          if (compactResult.freedTokens > 0) {
+            result = compactResult.messages;
+            totalFreed += compactResult.freedTokens;
+            type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
+          }
+        }
+      } else {
+        const compactResult = simpleCompact(result, {
+          ...this.config,
+          pruneProtect: 0,
+        });
+        if (compactResult.freedTokens > 0) {
+          result = compactResult.messages;
+          totalFreed += compactResult.freedTokens;
+          type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
+        }
+      }
+    }
+
+    return {
+      messages: result,
+      freedTokens: totalFreed,
+      type,
+    };
+  }
+
+  /**
+   * 过滤已压缩的内容（发送给模型前调用）
+   */
+  filterCompacted(messages: ModelMessage[]): ModelMessage[] {
+    return filterCompacted(messages);
+  }
+
+  /**
+   * 检查消息是否为摘要消息
+   */
+  isSummaryMessage(message: ModelMessage): boolean {
+    return isSummaryMessage(message);
+  }
+
+  /**
+   * 格式化 token 使用情况（用于 CLI 显示）
+   */
+  formatUsage(messages: ModelMessage[]): string {
+    const usage = this.calculateUsage(messages);
+    const used = TokenCounter.format(usage.input);
+    const limit = TokenCounter.format(usage.available);
+    const percent = usage.usagePercent.toFixed(0);
+    return `${used}/${limit} (${percent}%)`;
+  }
+}
+
+// 导出单例（可选使用）
+export const compressionManager = new CompressionManager();
@@ -0,0 +1,187 @@
+import type { ModelMessage } from 'ai';
+import { TokenCounter } from './token-counter.js';
+import {
+  COMPACTED_PLACEHOLDER,
+  SUMMARY_MARKER,
+  COMPACTED_MARKER,
+  type CompressionConfig,
+  DEFAULT_COMPRESSION_CONFIG,
+} from './types.js';
+
+// 扩展的工具结果类型，支持压缩标记
+interface CompactedToolResult {
+  type: 'tool-result';
+  toolCallId: string;
+  result: unknown;
+  [COMPACTED_MARKER]?: {
+    compactedAt: number;
+    originalSize: number;
+  };
+}
+
+/**
+ * 检查消息是否为摘要消息
+ */
+function isSummaryMessage(message: ModelMessage): boolean {
+  if (typeof message.content === 'string') {
+    return message.content.includes(SUMMARY_MARKER);
+  }
+  if (Array.isArray(message.content)) {
+    return message.content.some(
+      (part) =>
+        typeof part === 'object' &&
+        part !== null &&
+        'text' in part &&
+        typeof (part as { text?: unknown }).text === 'string' &&
+        ((part as { text: string }).text).includes(SUMMARY_MARKER)
+    );
+  }
+  return false;
+}
+
+/**
+ * 检查是否为工具结果
+ */
+function isToolResult(part: unknown): part is CompactedToolResult {
+  return (
+    typeof part === 'object' &&
+    part !== null &&
+    (part as { type?: unknown }).type === 'tool-result'
+  );
+}
+
+/**
+ * 检查工具结果是否已压缩
+ */
+function isCompactedResult(part: unknown): boolean {
+  if (!isToolResult(part)) return false;
+  return COMPACTED_MARKER in part;
+}
+
+/**
+ * 获取工具结果的 token 数量
+ */
+function getToolResultTokens(part: unknown): number {
+  if (!isToolResult(part)) return 0;
+  return TokenCounter.estimateText(JSON.stringify(part.result));
+}
+
+/**
+ * 压缩工具结果
+ */
+function compactToolResult(part: CompactedToolResult): CompactedToolResult {
+  const originalSize = TokenCounter.estimateText(JSON.stringify(part.result));
+  return {
+    ...part,
+    result: COMPACTED_PLACEHOLDER,
+    [COMPACTED_MARKER]: {
+      compactedAt: Date.now(),
+      originalSize,
+    },
+  };
+}
+
+/**
+ * Prune 策略：压缩旧的工具调用结果
+ *
+ * 逻辑：
+ * 1. 从后往前遍历消息
+ * 2. 跳过最近 pruneProtect tokens 的工具结果
+ * 3. 遇到 summary 消息停止
+ * 4. 将超出保护范围的 tool-result 替换为占位符
+ *
+ * @param messages 消息数组
+ * @param config 压缩配置
+ * @returns 处理后的消息数组和释放的 tokens
+ */
+export function prune(
+  messages: ModelMessage[],
+  config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
+): { messages: ModelMessage[]; freedTokens: number } {
+  const { pruneProtect, pruneMinimum } = config;
+
+  // 深拷贝消息数组
+  const result = JSON.parse(JSON.stringify(messages)) as ModelMessage[];
+
+  let protectedTokens = 0;
+  let freedTokens = 0;
+  const toPrune: Array<{ msgIndex: number; partIndex: number; tokens: number }> = [];
+
+  // 从后往前遍历
+  for (let msgIndex = result.length - 1; msgIndex >= 0; msgIndex--) {
+    const message = result[msgIndex];
+
+    // 遇到摘要消息停止
+    if (isSummaryMessage(message)) {
+      break;
+    }
+
+    // 只处理包含工具结果的消息
+    if (!Array.isArray(message.content)) continue;
+
+    for (let partIndex = message.content.length - 1; partIndex >= 0; partIndex--) {
+      const part = message.content[partIndex];
+
+      // 跳过非工具结果
+      if (!isToolResult(part)) continue;
+
+      // 跳过已压缩的
+      if (isCompactedResult(part)) {
+        break; // 遇到已压缩的，说明之前已经 prune 过，停止
+      }
+
+      const tokens = getToolResultTokens(part);
+      protectedTokens += tokens;
+
+      // 超出保护范围的标记为待压缩
+      if (protectedTokens > pruneProtect) {
+        toPrune.push({ msgIndex, partIndex, tokens });
+        freedTokens += tokens;
+      }
+    }
+  }
+
+  // 如果释放的 tokens 不够最小量，不执行压缩
+  if (freedTokens < pruneMinimum) {
+    return { messages, freedTokens: 0 };
+  }
+
+  // 执行压缩
+  for (const { msgIndex, partIndex } of toPrune) {
+    const message = result[msgIndex];
+    if (Array.isArray(message.content)) {
+      const part = message.content[partIndex];
+      if (isToolResult(part)) {
+        // 使用 any 来绕过严格类型检查，因为我们在运行时知道这是安全的
+        (message.content as unknown[])[partIndex] = compactToolResult(part);
+      }
+    }
+  }
+
+  return { messages: result, freedTokens };
+}
+
+/**
+ * 过滤已压缩的内容（用于发送给模型前）
+ * 将已压缩的工具结果替换为占位符文本
+ */
+export function filterCompacted(messages: ModelMessage[]): ModelMessage[] {
+  return messages.map((message) => {
+    if (!Array.isArray(message.content)) return message;
+
+    const filteredContent = message.content.map((part) => {
+      if (isCompactedResult(part) && isToolResult(part)) {
+        return {
+          ...part,
+          result: COMPACTED_PLACEHOLDER,
+        };
+      }
+      return part;
+    });
+
+    return {
+      ...message,
+      content: filteredContent,
+    } as ModelMessage;
+  });
+}
@@ -0,0 +1,100 @@
+import type { ModelMessage } from 'ai';
+
+/**
+ * Token 计数器
+ * 使用简单的字符估算，不依赖外部库
+ * 估算规则：
+ * - 中文字符：约 1.5 字符/token
+ * - 英文/数字：约 4 字符/token
+ * - 混合内容取平均
+ */
+export class TokenCounter {
+  /**
+   * 估算文本的 token 数量
+   */
+  static estimateText(text: string): number {
+    if (!text) return 0;
+
+    // 统计中文字符数量
+    const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length;
+    // 其他字符数量
+    const otherChars = text.length - chineseChars;
+
+    // 中文约 1.5 字符/token，其他约 4 字符/token
+    const chineseTokens = chineseChars / 1.5;
+    const otherTokens = otherChars / 4;
+
+    return Math.ceil(chineseTokens + otherTokens);
+  }
+
+  /**
+   * 估算消息内容的 token 数量
+   */
+  static estimateContent(content: ModelMessage['content']): number {
+    if (typeof content === 'string') {
+      return this.estimateText(content);
+    }
+
+    if (Array.isArray(content)) {
+      let total = 0;
+      for (const part of content) {
+        if (typeof part === 'string') {
+          total += this.estimateText(part);
+        } else if ('text' in part && typeof part.text === 'string') {
+          total += this.estimateText(part.text);
+        } else if ('result' in part) {
+          // tool-result
+          total += this.estimateText(JSON.stringify(part.result));
+        } else if ('args' in part) {
+          // tool-call
+          total += this.estimateText(JSON.stringify(part.args));
+          total += 20; // 工具名称等开销
+        } else {
+          // 其他类型，序列化估算
+          total += this.estimateText(JSON.stringify(part));
+        }
+      }
+      return total;
+    }
+
+    return 0;
+  }
+
+  /**
+   * 估算单条消息的 token 数量
+   */
+  static estimateMessage(message: ModelMessage): number {
+    let tokens = 0;
+
+    // 角色标记开销
+    tokens += 4;
+
+    // 内容
+    tokens += this.estimateContent(message.content);
+
+    return tokens;
+  }
+
+  /**
+   * 估算消息数组的总 token 数量
+   */
+  static estimateMessages(messages: ModelMessage[]): number {
+    let total = 0;
+    for (const message of messages) {
+      total += this.estimateMessage(message);
+    }
+    // 消息间分隔开销
+    total += messages.length * 3;
+    return total;
+  }
+
+  /**
+   * 格式化 token 数量显示
+   */
+  static format(tokens: number): string {
+    if (tokens >= 1000) {
+      return `${(tokens / 1000).toFixed(1)}k`;
+    }
+    return `${tokens}`;
+  }
+}
@@ -0,0 +1,77 @@
+import type { LanguageModel } from 'ai';
+
+/**
+ * Token 使用统计
+ */
+export interface TokenUsage {
+  /** 输入 tokens（估算） */
+  input: number;
+  /** 上下文限制 */
+  contextLimit: number;
+  /** 可用空间（contextLimit - outputReserve） */
+  available: number;
+  /** 使用百分比 (0-100) */
+  usagePercent: number;
+}
+
+/**
+ * 压缩配置
+ */
+export interface CompressionConfig {
+  /** 模型上下文限制 (默认 200k) */
+  contextLimit: number;
+  /** 预留输出 tokens (默认 32k) */
+  outputReserve: number;
+  /** 保护最近 tokens 不被 prune (默认 40k) */
+  pruneProtect: number;
+  /** 最小清理量才执行 prune (默认 20k) */
+  pruneMinimum: number;
+  /** 溢出阈值，超过此比例触发自动压缩 (默认 0.85) */
+  overflowThreshold: number;
+}
+
+/**
+ * 默认压缩配置
+ */
+export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
+  contextLimit: 200_000,
+  outputReserve: 32_000,
+  pruneProtect: 40_000,
+  pruneMinimum: 20_000,
+  overflowThreshold: 0.85,
+};
+
+/**
+ * 压缩占位符
+ */
+export const COMPACTED_PLACEHOLDER = '[此工具输出已压缩]';
+
+/**
+ * 摘要消息标记 key
+ */
+export const SUMMARY_MARKER = '__summary__';
+
+/**
+ * 工具结果压缩标记 key
+ */
+export const COMPACTED_MARKER = '__compacted__';
+
+/**
+ * 压缩上下文（传递给压缩器的参数）
+ */
+export interface CompressionContext {
+  config: CompressionConfig;
+  model?: LanguageModel;
+}
+
+/**
+ * 压缩结果
+ */
+export interface CompressionResult {
+  /** 压缩后的消息 */
+  messages: import('ai').ModelMessage[];
+  /** 释放的 tokens */
+  freedTokens: number;
+  /** 压缩类型 */
+  type: 'prune' | 'compaction' | 'both';
+}
@@ -12,6 +12,11 @@ import type { Tool, ToolResult, Message, AgentConfig, ProviderType } from '../ty
 import { buildZodSchema } from '../types/index.js';
 import { ToolRegistry } from '../tools/registry.js';
 import { SessionManager } from '../session/index.js';
+import {
+  CompressionManager,
+  type TokenUsage,
+  type CompressionConfig,
+} from '../context/index.js';

 // Provider 工厂函数类型
 type ProviderFactory = (apiKey: string) => (model: string) => LanguageModel;
@@ -45,7 +50,10 @@ export class Agent {
  // 会话管理器（可选）
  private sessionManager: SessionManager | null = null;

-  constructor(config: AgentConfig) {
+  // 压缩管理器
+  private compressionManager: CompressionManager;
+
+  constructor(config: AgentConfig, compressionConfig?: Partial<CompressionConfig>) {
    this.config = config;

    const providerFactory = providers[config.provider];
@@ -53,6 +61,11 @@ export class Agent {
      throw new Error(`不支持的 provider: ${config.provider}`);
    }
    this.getModel = providerFactory(config.apiKey);
+
+    // 初始化压缩管理器
+    this.compressionManager = new CompressionManager(compressionConfig);
+    // 设置模型用于生成摘要
+    this.compressionManager.setModel(this.getModel(config.model));
  }

  /**
@@ -230,6 +243,17 @@ export class Agent {
    // 将完整的响应消息添加到历史（包括工具调用和结果）
    this.conversationHistory.push(...responseMessages);

+    // 检查是否需要自动压缩
+    if (this.compressionManager.shouldCompress(this.conversationHistory)) {
+      const result = await this.compressionManager.compress(this.conversationHistory);
+      if (result.freedTokens > 0) {
+        this.conversationHistory = result.messages;
+        if (onStream) {
+          onStream(`\n[自动压缩: 释放了 ${(result.freedTokens / 1000).toFixed(1)}k tokens]\n`);
+        }
+      }
+    }
+
    // 持久化会话
    await this.persistSession();

@@ -294,4 +318,40 @@ export class Agent {
      };
    }
  }
+
+  /**
+   * 获取当前上下文使用情况
+   */
+  getContextUsage(): TokenUsage {
+    return this.compressionManager.calculateUsage(this.conversationHistory);
+  }
+
+  /**
+   * 获取格式化的上下文使用情况（用于 CLI 显示）
+   */
+  getContextUsageFormatted(): string {
+    return this.compressionManager.formatUsage(this.conversationHistory);
+  }
+
+  /**
+   * 获取压缩管理器
+   */
+  getCompressionManager(): CompressionManager {
+    return this.compressionManager;
+  }
+
+  /**
+   * 手动压缩对话历史（用于 /compact 命令）
+   */
+  async compactHistory(): Promise<{ freedTokens: number; type: string }> {
+    const result = await this.compressionManager.forceCompress(this.conversationHistory);
+    if (result.freedTokens > 0) {
+      this.conversationHistory = result.messages;
+      await this.persistSession();
+    }
+    return {
+      freedTokens: result.freedTokens,
+      type: result.type,
+    };
+  }
 }
@@ -28,14 +28,36 @@ export class TerminalUI {
    console.log(chalk.cyan('║') + chalk.gray('     Powered by DeepSeek / Claude      ') + chalk.cyan('║'));
    console.log(chalk.cyan('╚════════════════════════════════════════╝\n'));
    console.log(chalk.gray('输入你的问题，或使用以下命令：'));
-    console.log(chalk.yellow('  /help') + chalk.gray('  - 显示帮助'));
-    console.log(chalk.yellow('  /clear') + chalk.gray(' - 清空对话历史'));
-    console.log(chalk.yellow('  /exit') + chalk.gray('  - 退出程序'));
+    console.log(chalk.yellow('  /help') + chalk.gray('    - 显示帮助'));
+    console.log(chalk.yellow('  /clear') + chalk.gray('   - 清空对话历史'));
+    console.log(chalk.yellow('  /compact') + chalk.gray(' - 压缩对话历史'));
+    console.log(chalk.yellow('  /context') + chalk.gray(' - 查看上下文使用情况'));
+    console.log(chalk.yellow('  /exit') + chalk.gray('    - 退出程序'));
    console.log('');
  }

+  // 格式化上下文使用情况（带颜色）
+  private formatContextUsage(): string {
+    const usage = this.agent.getContextUsage();
+    const percent = usage.usagePercent;
+
+    // 根据使用率选择颜色
+    let colorFn: (text: string) => string;
+    if (percent < 50) {
+      colorFn = chalk.green;
+    } else if (percent < 80) {
+      colorFn = chalk.yellow;
+    } else {
+      colorFn = chalk.red;
+    }
+
+    const used = usage.input >= 1000 ? `${(usage.input / 1000).toFixed(1)}k` : `${usage.input}`;
+    const limit = `${(usage.available / 1000).toFixed(0)}k`;
+    return colorFn(`[${used}/${limit}]`);
+  }
+
  // 处理特殊命令
-  private handleCommand(input: string): boolean {
+  private async handleCommand(input: string): Promise<boolean> {
    const command = input.toLowerCase().trim();

    switch (command) {
@@ -47,14 +69,52 @@ export class TerminalUI {
        console.log(chalk.gray('  • 搜索代码'));
        console.log(chalk.gray('  • 回答编程问题'));
        console.log('');
+        console.log(chalk.white('  命令：'));
+        console.log(chalk.gray('  • /help    - 显示此帮助'));
+        console.log(chalk.gray('  • /clear   - 清空对话历史'));
+        console.log(chalk.gray('  • /compact - 压缩对话历史，释放上下文空间'));
+        console.log(chalk.gray('  • /context - 显示当前上下文使用情况'));
+        console.log(chalk.gray('  • /exit    - 退出程序'));
+        console.log('');
        return true;

      case '/clear':
-        // clearHistory 现在是异步的
-        void this.agent.clearHistory();
+        await this.agent.clearHistory();
        console.log(chalk.green('✓ 对话历史已清空\n'));
        return true;

+      case '/compact':
+        console.log(chalk.yellow('正在压缩对话历史...\n'));
+        try {
+          const beforeUsage = this.agent.getContextUsage();
+          const result = await this.agent.compactHistory();
+          const afterUsage = this.agent.getContextUsage();
+
+          if (result.freedTokens > 0) {
+            console.log(chalk.green(`✓ 压缩完成！`));
+            console.log(chalk.gray(`  策略: ${result.type}`));
+            console.log(chalk.gray(`  释放: ${(result.freedTokens / 1000).toFixed(1)}k tokens`));
+            console.log(chalk.gray(`  之前: ${(beforeUsage.input / 1000).toFixed(1)}k`));
+            console.log(chalk.gray(`  之后: ${(afterUsage.input / 1000).toFixed(1)}k`));
+          } else {
+            console.log(chalk.yellow('没有可压缩的内容'));
+          }
+          console.log('');
+        } catch (error) {
+          console.log(chalk.red(`压缩失败: ${error instanceof Error ? error.message : String(error)}\n`));
+        }
+        return true;
+
+      case '/context':
+        const usage = this.agent.getContextUsage();
+        console.log(chalk.cyan('\n📊 上下文使用情况：'));
+        console.log(chalk.gray(`  已使用: ${(usage.input / 1000).toFixed(1)}k tokens`));
+        console.log(chalk.gray(`  可用: ${(usage.available / 1000).toFixed(0)}k tokens`));
+        console.log(chalk.gray(`  上下文限制: ${(usage.contextLimit / 1000).toFixed(0)}k tokens`));
+        console.log(chalk.gray(`  使用率: ${usage.usagePercent.toFixed(1)}%`));
+        console.log('');
+        return true;
+
      case '/exit':
      case '/quit':
        console.log(chalk.cyan('\n👋 再见！\n'));
@@ -74,7 +134,9 @@ export class TerminalUI {
        return;
      }

-      this.rl.question(chalk.green('You > '), (answer) => {
+      // 显示带上下文使用情况的提示符
+      const contextInfo = this.formatContextUsage();
+      this.rl.question(`${contextInfo} ${chalk.green('You >')} `, (answer) => {
        resolve(answer ?? '');
      });
    });
@@ -95,7 +157,7 @@ export class TerminalUI {

        // 处理命令
        if (input.startsWith('/')) {
-          if (this.handleCommand(input)) {
+          if (await this.handleCommand(input)) {
            continue;
          }
        }