feat(context): 优化对话压缩系统

- 添加独立摘要模型配置支持(SUMMARY_PROVIDER/MODEL/API_KEY/BASE_URL)
- 添加 CompressionStatus 枚举和 DetailedCompressionResult 详细返回类型
- 实现压缩失败检测(空摘要、token膨胀)
- 添加首条 user-assistant 对保护,确保上下文连贯性
- CompressionManager 支持独立摘要模型(优先使用小模型降低成本)
- Agent 自动压缩时显示详细状态信息
- 更新相关测试用例
This commit is contained in:
2025-12-13 11:13:20 +08:00
parent 9ff2934089
commit f54f24b079
10 changed files with 495 additions and 102 deletions
+180 -27
View File
@@ -2,7 +2,9 @@ import { generateText, type ModelMessage, type LanguageModel } from 'ai';
import { TokenCounter } from './token-counter.js';
import {
SUMMARY_MARKER,
CompressionStatus,
type CompressionConfig,
type DetailedCompressionResult,
DEFAULT_COMPRESSION_CONFIG,
} from './types.js';
@@ -58,6 +60,90 @@ function createSummaryMessage(summary: string): ModelMessage {
};
}
/**
* 验证摘要结果
* @returns 验证结果,包含状态和 token 数
*/
function validateSummary(
summary: string,
originalTokens: number
): { valid: boolean; status: CompressionStatus; summaryTokens: number } {
// 1. 检测空摘要
if (!summary || summary.trim().length === 0) {
return {
valid: false,
status: CompressionStatus.FAILED_EMPTY_SUMMARY,
summaryTokens: 0,
};
}
// 2. 检测 token 膨胀(摘要比原消息还大)
const summaryTokens = TokenCounter.estimateText(summary);
if (summaryTokens >= originalTokens) {
return {
valid: false,
status: CompressionStatus.FAILED_TOKEN_INFLATED,
summaryTokens,
};
}
return {
valid: true,
status: CompressionStatus.SUCCESS,
summaryTokens,
};
}
/**
* 查找首个 user-assistant 对
* 用于首条消息保护,确保上下文连贯性
*/
function findFirstUserAssistantPair(messages: ModelMessage[]): {
firstPair: ModelMessage[];
rest: ModelMessage[];
} | null {
// 找到第一个 user 消息
let userIndex = -1;
for (let i = 0; i < messages.length; i++) {
if (messages[i].role === 'user') {
userIndex = i;
break;
}
}
if (userIndex === -1) return null;
// 找到紧随其后的 assistant 消息
let assistantIndex = -1;
for (let i = userIndex + 1; i < messages.length; i++) {
if (messages[i].role === 'assistant') {
assistantIndex = i;
break;
}
}
if (assistantIndex === -1) {
// 只有 user 消息,没有 assistant 响应
return {
firstPair: messages.slice(0, userIndex + 1),
rest: messages.slice(userIndex + 1),
};
}
return {
firstPair: messages.slice(0, assistantIndex + 1),
rest: messages.slice(assistantIndex + 1),
};
}
/**
* Compaction 选项
*/
export interface CompactOptions {
/** 是否保护首个 user-assistant 对 */
protectFirstPair?: boolean;
}
/**
* Compaction 策略:使用 AI 生成对话摘要
*
@@ -70,21 +156,36 @@ function createSummaryMessage(summary: string): ModelMessage {
* @param messages 消息数组
* @param model 语言模型
* @param config 压缩配置
* @returns 压缩后的消息数组和释放的 tokens
* @param options 压缩选项
* @returns 详细压缩结果
*/
export async function compact(
messages: ModelMessage[],
model: LanguageModel,
config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
): Promise<{ messages: ModelMessage[]; freedTokens: number }> {
config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG,
options: CompactOptions = {}
): Promise<DetailedCompressionResult> {
const { pruneProtect } = config;
const { protectFirstPair = true } = options;
// 计算需要保护的消息数量
// 首条消息保护:分离首个 user-assistant 对
let protectedFirst: ModelMessage[] = [];
let compressibleMessages = messages;
if (protectFirstPair) {
const firstPairResult = findFirstUserAssistantPair(messages);
if (firstPairResult && firstPairResult.rest.length > 0) {
protectedFirst = firstPairResult.firstPair;
compressibleMessages = firstPairResult.rest;
}
}
// 计算需要保护的消息数量(从可压缩部分的末尾算起)
let protectedTokens = 0;
let protectedCount = 0;
for (let i = messages.length - 1; i >= 0; i--) {
const tokens = TokenCounter.estimateMessage(messages[i]);
for (let i = compressibleMessages.length - 1; i >= 0; i--) {
const tokens = TokenCounter.estimateMessage(compressibleMessages[i]);
if (protectedTokens + tokens > pruneProtect) {
break;
}
@@ -101,12 +202,17 @@ export async function compact(
}
// 分割消息:需要压缩的部分 vs 保护的部分
const toCompact = messages.slice(0, messages.length - protectedCount);
const toKeep = messages.slice(messages.length - protectedCount);
const toCompact = compressibleMessages.slice(0, compressibleMessages.length - protectedCount);
const toKeep = compressibleMessages.slice(compressibleMessages.length - protectedCount);
// 如果没有需要压缩的消息,直接返回
if (toCompact.length === 0) {
return { messages, freedTokens: 0 };
return {
messages,
freedTokens: 0,
type: 'none',
status: CompressionStatus.NOOP,
};
}
// 检查是否已有摘要消息
@@ -115,7 +221,7 @@ export async function compact(
existingSummaryIndex >= 0 ? toCompact.slice(existingSummaryIndex) : toCompact;
// 计算压缩前的 tokens
const beforeTokens = TokenCounter.estimateMessages(toCompact);
const originalTokens = TokenCounter.estimateMessages(toCompact);
try {
// 调用 AI 生成摘要
@@ -132,18 +238,44 @@ export async function compact(
maxOutputTokens: 2000,
});
const summaryMessage = createSummaryMessage(result.text);
const afterTokens = TokenCounter.estimateMessage(summaryMessage);
// 验证摘要结果
const validation = validateSummary(result.text, originalTokens);
// 返回:摘要 + 保护的消息
if (!validation.valid) {
console.warn(`摘要验证失败: ${validation.status}`);
return {
messages,
freedTokens: 0,
type: 'none',
status: validation.status,
originalTokens,
summaryTokens: validation.summaryTokens,
};
}
const summaryMessage = createSummaryMessage(result.text);
const summaryTokens = TokenCounter.estimateMessage(summaryMessage);
const freedTokens = originalTokens - summaryTokens;
// 返回:首条保护 + 摘要 + 末尾保护的消息
return {
messages: [summaryMessage, ...toKeep],
freedTokens: beforeTokens - afterTokens,
messages: [...protectedFirst, summaryMessage, ...toKeep],
freedTokens,
type: 'compaction',
status: CompressionStatus.SUCCESS,
originalTokens,
summaryTokens,
};
} catch (error) {
console.error('生成摘要失败:', error);
// 失败时返回原消息
return { messages, freedTokens: 0 };
return {
messages,
freedTokens: 0,
type: 'none',
status: CompressionStatus.FAILED_ERROR,
error: error instanceof Error ? error.message : String(error),
originalTokens,
};
}
}
@@ -153,16 +285,30 @@ export async function compact(
*/
export function simpleCompact(
messages: ModelMessage[],
config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG
): { messages: ModelMessage[]; freedTokens: number } {
config: CompressionConfig = DEFAULT_COMPRESSION_CONFIG,
options: CompactOptions = {}
): DetailedCompressionResult {
const { pruneProtect } = config;
const { protectFirstPair = true } = options;
// 首条消息保护:分离首个 user-assistant 对
let protectedFirst: ModelMessage[] = [];
let compressibleMessages = messages;
if (protectFirstPair) {
const firstPairResult = findFirstUserAssistantPair(messages);
if (firstPairResult && firstPairResult.rest.length > 0) {
protectedFirst = firstPairResult.firstPair;
compressibleMessages = firstPairResult.rest;
}
}
// 计算需要保留的消息
let keptTokens = 0;
let keepFromIndex = messages.length;
let keepFromIndex = compressibleMessages.length;
for (let i = messages.length - 1; i >= 0; i--) {
const tokens = TokenCounter.estimateMessage(messages[i]);
for (let i = compressibleMessages.length - 1; i >= 0; i--) {
const tokens = TokenCounter.estimateMessage(compressibleMessages[i]);
if (keptTokens + tokens > pruneProtect) {
break;
}
@@ -172,13 +318,18 @@ export function simpleCompact(
// 确保至少保留最后 N 条消息(强制模式下保留 1 条,否则保留 2 条)
const minKeep = pruneProtect > 0 ? 2 : 1;
keepFromIndex = Math.min(keepFromIndex, messages.length - minKeep);
keepFromIndex = Math.min(keepFromIndex, compressibleMessages.length - minKeep);
const removed = messages.slice(0, keepFromIndex);
const kept = messages.slice(keepFromIndex);
const removed = compressibleMessages.slice(0, keepFromIndex);
const kept = compressibleMessages.slice(keepFromIndex);
if (removed.length === 0) {
return { messages, freedTokens: 0 };
return {
messages,
freedTokens: 0,
type: 'none',
status: CompressionStatus.NOOP,
};
}
// 创建简单摘要
@@ -190,7 +341,9 @@ export function simpleCompact(
const freedTokens = TokenCounter.estimateMessages(removed);
return {
messages: [simpleSummary, ...kept],
messages: [...protectedFirst, simpleSummary, ...kept],
freedTokens,
type: 'compaction',
status: CompressionStatus.SUCCESS,
};
}
+3 -1
View File
@@ -4,6 +4,7 @@ export type {
CompressionConfig,
CompressionContext,
CompressionResult,
DetailedCompressionResult,
} from './types.js';
export {
@@ -11,6 +12,7 @@ export {
COMPACTED_PLACEHOLDER,
SUMMARY_MARKER,
COMPACTED_MARKER,
CompressionStatus,
} from './types.js';
// Token 计数器
@@ -20,7 +22,7 @@ export { TokenCounter } from './token-counter.js';
export { prune, filterCompacted } from './prune.js';
// Compaction 策略
export { compact, simpleCompact, isSummaryMessage } from './compaction.js';
export { compact, simpleCompact, isSummaryMessage, type CompactOptions } from './compaction.js';
// 压缩管理器
export { CompressionManager, compressionManager } from './manager.js';
+128 -51
View File
@@ -1,11 +1,12 @@
import type { ModelMessage, LanguageModel } from 'ai';
import { TokenCounter } from './token-counter.js';
import { prune, filterCompacted } from './prune.js';
import { compact, simpleCompact, isSummaryMessage } from './compaction.js';
import { compact, simpleCompact, isSummaryMessage, type CompactOptions } from './compaction.js';
import {
type TokenUsage,
type CompressionConfig,
type CompressionResult,
type DetailedCompressionResult,
CompressionStatus,
DEFAULT_COMPRESSION_CONFIG,
} from './types.js';
@@ -15,19 +16,55 @@ import {
*/
export class CompressionManager {
private config: CompressionConfig;
/** 主模型(摘要模型的后备) */
private model: LanguageModel | null = null;
/** 专用摘要模型(推荐使用小模型以降低成本) */
private summaryModel: LanguageModel | null = null;
/** 是否保护首条 user-assistant 对 */
private protectFirstPair: boolean = true;
constructor(config: Partial<CompressionConfig> = {}) {
this.config = { ...DEFAULT_COMPRESSION_CONFIG, ...config };
}
/**
* 设置用于生成摘要的模型
* 设置用于生成摘要的模型(后备)
*/
setModel(model: LanguageModel): void {
this.model = model;
}
/**
* 设置专用摘要模型(优先使用)
*/
setSummaryModel(model: LanguageModel): void {
this.summaryModel = model;
}
/**
* 获取用于摘要生成的模型
* 优先使用专用摘要模型,无则使用主模型
*/
private getSummaryModel(): LanguageModel | null {
return this.summaryModel ?? this.model;
}
/**
* 设置是否保护首条 user-assistant 对
*/
setProtectFirstPair(protect: boolean): void {
this.protectFirstPair = protect;
}
/**
* 获取压缩选项
*/
private getCompactOptions(): CompactOptions {
return {
protectFirstPair: this.protectFirstPair,
};
}
/**
* 获取当前配置
*/
@@ -85,37 +122,70 @@ export class CompressionManager {
/**
* 执行 compaction 策略
*/
async compact(messages: ModelMessage[]): Promise<{ messages: ModelMessage[]; freedTokens: number }> {
if (this.model) {
return compact(messages, this.model, this.config);
async compact(messages: ModelMessage[]): Promise<DetailedCompressionResult> {
const summaryModel = this.getSummaryModel();
if (summaryModel) {
return compact(messages, summaryModel, this.config, this.getCompactOptions());
}
// 没有模型时使用简单压缩
return simpleCompact(messages, this.config);
return simpleCompact(messages, this.config, this.getCompactOptions());
}
/**
* 自动压缩:先 prune,不够再 compact
*/
async compress(messages: ModelMessage[]): Promise<CompressionResult> {
async compress(messages: ModelMessage[]): Promise<DetailedCompressionResult> {
// 检查是否需要压缩
if (!this.shouldCompress(messages)) {
return {
messages,
freedTokens: 0,
type: 'none',
status: CompressionStatus.NOOP,
};
}
let result = [...messages];
let totalFreed = 0;
let type: CompressionResult['type'] = 'prune';
let type: DetailedCompressionResult['type'] = 'none';
// 第一步:尝试 prune
const pruneResult = this.prune(result);
if (pruneResult.freedTokens > 0) {
result = pruneResult.messages;
totalFreed += pruneResult.freedTokens;
type = 'prune';
}
// 检查是否还需要进一步压缩
if (this.shouldCompress(result)) {
// 第二步:执行 compaction
const compactResult = await this.compact(result);
if (compactResult.freedTokens > 0) {
if (compactResult.status === CompressionStatus.SUCCESS && compactResult.freedTokens > 0) {
result = compactResult.messages;
totalFreed += compactResult.freedTokens;
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
return {
messages: result,
freedTokens: totalFreed,
type,
status: CompressionStatus.SUCCESS,
originalTokens: compactResult.originalTokens,
summaryTokens: compactResult.summaryTokens,
};
}
// compaction 失败,返回失败状态
if (compactResult.status !== CompressionStatus.NOOP) {
return {
messages, // 返回原消息
freedTokens: 0,
type: 'none',
status: compactResult.status,
error: compactResult.error,
};
}
}
@@ -123,6 +193,7 @@ export class CompressionManager {
messages: result,
freedTokens: totalFreed,
type,
status: totalFreed > 0 ? CompressionStatus.SUCCESS : CompressionStatus.NOOP,
};
}
@@ -130,19 +201,20 @@ export class CompressionManager {
* 强制压缩(用于 /compact 命令)
* 无论是否达到阈值都执行压缩
*/
async forceCompress(messages: ModelMessage[]): Promise<CompressionResult> {
async forceCompress(messages: ModelMessage[]): Promise<DetailedCompressionResult> {
// 消息数量太少时不压缩(至少需要 4 条消息)
if (messages.length <= 4) {
return {
messages,
freedTokens: 0,
type: 'prune',
type: 'none',
status: CompressionStatus.NOOP,
};
}
let result = [...messages];
let totalFreed = 0;
let type: CompressionResult['type'] = 'prune';
let type: DetailedCompressionResult['type'] = 'none';
// 先尝试 prune(使用强制配置)
const pruneConfig: CompressionConfig = {
@@ -156,55 +228,60 @@ export class CompressionManager {
if (pruneResult.freedTokens > 0) {
result = pruneResult.messages;
totalFreed += pruneResult.freedTokens;
type = 'prune';
}
// 强制 compaction只保留最后 2 条消息
// 计算保留消息的 tokens
const keepCount = Math.min(2, result.length - 1);
const toKeep = result.slice(-keepCount);
const toCompact = result.slice(0, result.length - keepCount);
// 强制 compaction使用强制配置
const summaryModel = this.getSummaryModel();
const forceConfig: CompressionConfig = {
...this.config,
pruneProtect: 0, // 强制模式:不保护任何 tokens
};
// 强制模式不保护首条消息对
const forceOptions = { protectFirstPair: false };
if (toCompact.length > 0) {
if (this.model) {
try {
const compactResult = await compact(result, this.model, {
...this.config,
pruneProtect: 0, // 强制模式:不保护任何 tokens
});
if (compactResult.freedTokens > 0) {
result = compactResult.messages;
totalFreed += compactResult.freedTokens;
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
}
} catch {
// AI 压缩失败,使用简单压缩
const compactResult = simpleCompact(result, {
...this.config,
pruneProtect: 0,
});
if (compactResult.freedTokens > 0) {
result = compactResult.messages;
totalFreed += compactResult.freedTokens;
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
}
}
} else {
const compactResult = simpleCompact(result, {
...this.config,
pruneProtect: 0,
});
if (compactResult.freedTokens > 0) {
result = compactResult.messages;
totalFreed += compactResult.freedTokens;
type = pruneResult.freedTokens > 0 ? 'both' : 'compaction';
if (summaryModel) {
const compactResult = await compact(result, summaryModel, forceConfig, forceOptions);
if (compactResult.status === CompressionStatus.SUCCESS && compactResult.freedTokens > 0) {
result = compactResult.messages;
totalFreed += compactResult.freedTokens;
type = type === 'prune' ? 'both' : 'compaction';
return {
messages: result,
freedTokens: totalFreed,
type,
status: CompressionStatus.SUCCESS,
originalTokens: compactResult.originalTokens,
summaryTokens: compactResult.summaryTokens,
};
}
// AI 压缩失败,回退到简单压缩
if (compactResult.status !== CompressionStatus.NOOP) {
const simpleResult = simpleCompact(result, forceConfig, forceOptions);
if (simpleResult.freedTokens > 0) {
result = simpleResult.messages;
totalFreed += simpleResult.freedTokens;
type = type === 'prune' ? 'both' : 'compaction';
}
}
} else {
// 没有模型,使用简单压缩
const simpleResult = simpleCompact(result, forceConfig, forceOptions);
if (simpleResult.freedTokens > 0) {
result = simpleResult.messages;
totalFreed += simpleResult.freedTokens;
type = type === 'prune' ? 'both' : 'compaction';
}
}
return {
messages: result,
freedTokens: totalFreed,
type,
status: totalFreed > 0 ? CompressionStatus.SUCCESS : CompressionStatus.NOOP,
};
}
+37 -1
View File
@@ -65,7 +65,7 @@ export interface CompressionContext {
}
/**
* 压缩结果
* 压缩结果(基础)
*/
export interface CompressionResult {
/** 压缩后的消息 */
@@ -75,3 +75,39 @@ export interface CompressionResult {
/** 压缩类型 */
type: 'prune' | 'compaction' | 'both';
}
/**
* 压缩状态枚举
*/
export enum CompressionStatus {
/** 成功压缩 */
SUCCESS = 'success',
/** 未达阈值,无需压缩 */
NOOP = 'noop',
/** 失败:空摘要 */
FAILED_EMPTY_SUMMARY = 'failed_empty_summary',
/** 失败:token 膨胀(摘要反而增加 token */
FAILED_TOKEN_INFLATED = 'failed_token_inflated',
/** 失败:其他错误 */
FAILED_ERROR = 'failed_error',
}
/**
* 详细压缩结果
*/
export interface DetailedCompressionResult {
/** 压缩后的消息 */
messages: import('ai').ModelMessage[];
/** 释放的 tokens(正数=成功,0或负数=失败) */
freedTokens: number;
/** 压缩类型 */
type: 'prune' | 'compaction' | 'both' | 'none';
/** 详细状态 */
status: CompressionStatus;
/** 错误信息(失败时) */
error?: string;
/** 原始 token 数(压缩前) */
originalTokens?: number;
/** 摘要 token 数(压缩后) */
summaryTokens?: number;
}