feat: 重构 Vision 处理架构，支持自动委托 Vision Agent

- 主 Agent 收到图片后自动检测是否支持 vision，不支持时委托 Vision Agent 处理 - 添加 qwen-ai-provider-v5 支持百炼/DashScope API - Task 工具支持 images 参数，可传递图片给子 Agent - Vision Agent 使用独立的 VisionConfig 配置 - 移除 UI 层的 vision fallback 逻辑，统一在 Agent 层处理 - 删除废弃的 src/utils/vision.ts（原生 fetch 实现）
2025-12-11 18:21:36 +08:00
parent 32fdb244f0
commit abbb03bf50
10 changed files with 289 additions and 308 deletions
@@ -19,6 +19,7 @@
        "inquirer": "^12.0.0",
        "js-yaml": "^4.1.1",
        "ora": "^8.1.0",
        "qwen-ai-provider-v5": "^1.0.2",
        "tree-sitter-bash": "^0.25.1",
        "uuid": "^13.0.0",
        "vscode-jsonrpc": "^8.2.1",
@@ -2594,6 +2595,22 @@
      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
      "license": "MIT"
    },
    "node_modules/qwen-ai-provider-v5": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/qwen-ai-provider-v5/-/qwen-ai-provider-v5-1.0.2.tgz",
      "integrity": "sha512-IMweAFhHxM2OZzeZKyDUfcxCQCLkFioQv9TkprAXttV6XeTBTSjjUc17S9dUW4rOgtWLsCXoAkaAPUHj1jQYtg==",
      "license": "Apache-2.0",
      "dependencies": {
        "@ai-sdk/provider": "^2.0.0",
        "@ai-sdk/provider-utils": "^3.0.0"
      },
      "engines": {
        "node": ">=18.0.0"
      },
      "peerDependencies": {
        "zod": "^3.25.76 || ^4.1.8"
      }
    },
    "node_modules/resolve-pkg-maps": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
@@ -36,6 +36,7 @@
    "inquirer": "^12.0.0",
    "js-yaml": "^4.1.1",
    "ora": "^8.1.0",
    "qwen-ai-provider-v5": "^1.0.2",
    "tree-sitter-bash": "^0.25.1",
    "uuid": "^13.0.0",
    "vscode-jsonrpc": "^8.2.1",
@@ -1,6 +1,7 @@
 import { createAnthropic } from '@ai-sdk/anthropic';
 import { createDeepSeek } from '@ai-sdk/deepseek';
 import { createOpenAI } from '@ai-sdk/openai';
 import { createQwen } from 'qwen-ai-provider-v5';
 import {
  generateText,
  streamText,
@@ -9,13 +10,14 @@ import {
  type Tool as AITool,
  type LanguageModel,
 } from 'ai';
-import type { Tool, ToolResult, ProviderType, AgentConfig } from '../types/index.js';
+import type { Tool, ToolResult, ProviderType, AgentConfig, ContentBlock } from '../types/index.js';
 import { buildZodSchema } from '../types/index.js';
 import { ToolRegistry } from '../tools/registry.js';
 import type {
  AgentInfo,
  AgentExecutionContext,
  AgentExecutionResult,
  ImageData,
 } from './types.js';
 import { checkBashPermission } from './permission-merger.js';
@@ -28,6 +30,14 @@ interface ProviderOptions {
 // Provider 工厂函数类型
 type ProviderFactory = (options: ProviderOptions) => (model: string) => LanguageModel;
 /**
 * 检查 baseUrl 是否为阿里云百炼/DashScope
 */
 function isDashScopeUrl(baseUrl?: string): boolean {
  if (!baseUrl) return false;
  return baseUrl.includes('dashscope');
 }
 // Provider 注册表
 const providers: Record<ProviderType, ProviderFactory> = {
  anthropic: ({ apiKey, baseUrl }) => {
@@ -39,6 +49,11 @@ const providers: Record<ProviderType, ProviderFactory> = {
    return (model) => client(model);
  },
  openai: ({ apiKey, baseUrl }) => {
    // 如果是百炼的 URL，使用 qwen provider
    if (isDashScopeUrl(baseUrl)) {
      const client = createQwen({ apiKey, baseURL: baseUrl });
      return (model) => client(model);
    }
    const client = createOpenAI({ apiKey, baseURL: baseUrl });
    return (model) => client(model);
  },
@@ -79,7 +94,7 @@ export class AgentExecutor {
    prompt: string,
    context: AgentExecutionContext
  ): Promise<AgentExecutionResult> {
-    const { onStream, onToolCall, onToolResult } = context;
+    const { onStream, onToolCall, onToolResult, images } = context;
    // 获取过滤后的工具
    const tools = this.getFilteredTools();
@@ -93,11 +108,14 @@ export class AgentExecutor {
    const maxSteps = this.agentInfo.maxSteps ?? 10;
    const maxTokens = this.agentInfo.model?.maxTokens ?? this.baseConfig.maxTokens;
    // 构建消息内容（支持图片）
    const messageContent = this.buildMessageContent(prompt, images);
    // 构建初始消息
    const messages: ModelMessage[] = [
      {
        role: 'user',
-        content: prompt,
+        content: messageContent,
      },
    ];
@@ -308,4 +326,39 @@ export class AgentExecutor {
    // 否则使用基础配置的 systemPrompt
    return this.baseConfig.systemPrompt;
  }
  /**
   * 构建消息内容（支持图片）
   */
  private buildMessageContent(
    prompt: string,
    images?: ImageData[]
  ): string | ContentBlock[] {
    // 如果没有图片，直接返回文本
    if (!images || images.length === 0) {
      return prompt;
    }
    // 构建多模态内容
    const blocks: ContentBlock[] = [];
    // 先添加图片
    for (const img of images) {
      blocks.push({
        type: 'image',
        image: img.data,
        mimeType: img.mimeType,
      });
    }
    // 再添加文本
    if (prompt) {
      blocks.push({
        type: 'text',
        text: prompt,
      });
    }
    return blocks;
  }
 }
@@ -4,6 +4,7 @@ import { exploreAgent } from './explore.js';
 import { codeReviewerAgent } from './code-reviewer.js';
 import { buildAgent } from './build.js';
 import { planAgent } from './plan.js';
 import { visionAgent } from './vision.js';
 /**
 * 预设 Agent 集合
@@ -14,6 +15,7 @@ export const presetAgents: Record<string, Omit<AgentInfo, 'name'>> = {
  'code-reviewer': codeReviewerAgent,
  build: buildAgent,
  plan: planAgent,
  vision: visionAgent,
 };
 /**
@@ -30,4 +32,4 @@ export function isPresetAgent(name: string): boolean {
  return name in presetAgents;
 }
-export { generalAgent, exploreAgent, codeReviewerAgent, buildAgent, planAgent };
+export { generalAgent, exploreAgent, codeReviewerAgent, buildAgent, planAgent, visionAgent };
@@ -0,0 +1,52 @@
 import type { AgentInfo } from '../types.js';
 /**
 * Vision Agent
 * 图片理解专家，使用多模态模型分析图片内容
 */
 export const visionAgent: Omit<AgentInfo, 'name'> = {
  description: '图片理解专家，分析截图、设计稿、架构图等',
  mode: 'subagent',
  prompt: `你是一个专业的图片分析专家。你的任务是详细描述和分析用户提供的图片内容。
 分析要点：
 1. **整体概述**：图片的类型（截图、设计稿、图表、照片等）和主要内容
 2. **布局结构**：页面/图片的整体布局、区域划分
 3. **文字内容**：提取图片中的所有可见文字（完整、准确）
 4. **UI 元素**：按钮、输入框、菜单、图标等元素及其状态
 5. **视觉细节**：颜色、字体、间距、对齐等设计细节
 6. **交互状态**：hover、选中、禁用等状态指示
 7. **潜在问题**：如果用户询问问题，指出可能的问题或改进点
 输出格式：
 - 使用清晰的 Markdown 格式
 - 先给出整体概述，再逐一分析细节
 - 如果是 UI 截图，按区域从上到下、从左到右描述
 - 提取的文字用引号标注
 注意事项：
 - 描述要准确、具体，避免模糊表述
 - 如果某些内容不清晰，明确说明
 - 根据用户的问题重点分析相关部分`,
  tools: {
    enabled: [],
    noTask: true,
  },
  permission: {
    file: {
      read: 'deny',
      write: 'deny',
      edit: 'deny',
      delete: 'deny',
    },
    bash: {
      enabled: false,
    },
    git: {
      read: 'deny',
      write: 'deny',
      dangerous: 'deny',
    },
  },
  maxSteps: 1,
 };
@@ -131,6 +131,18 @@ export interface AgentConfigFile {
  agents?: Record<string, Omit<AgentInfo, 'name'>>;
 }
 /**
 * 图片数据（用于 Agent 执行上下文）
 */
 export interface ImageData {
  /** base64 编码的图片数据 */
  data: string;
  /** MIME 类型 */
  mimeType: string;
  /** 文件名（可选） */
  filename?: string;
 }
 /**
 * Agent 执行上下文
 */
@@ -139,6 +151,8 @@ export interface AgentExecutionContext {
  parentSessionId?: string;
  /** 工作目录 */
  workdir: string;
  /** 图片数据（用于支持多模态输入） */
  images?: ImageData[];
  /** 回调：输出流 */
  onStream?: (text: string) => void;
  /** 回调：工具调用 */
@@ -1,6 +1,7 @@
 import { createAnthropic } from '@ai-sdk/anthropic';
 import { createDeepSeek } from '@ai-sdk/deepseek';
 import { createOpenAI } from '@ai-sdk/openai';
 import { createQwen } from 'qwen-ai-provider-v5';
 import {
  generateText,
  streamText,
@@ -18,7 +19,9 @@ import {
  type TokenUsage,
  type CompressionConfig,
 } from '../context/index.js';
-import type { AgentInfo } from '../agent/types.js';
+import type { AgentInfo, ImageData } from '../agent/types.js';
 import { agentRegistry, AgentExecutor } from '../agent/index.js';
 import { loadVisionConfig } from '../utils/config.js';
 // Provider 配置
 interface ProviderOptions {
@@ -29,6 +32,14 @@ interface ProviderOptions {
 // Provider 工厂函数类型
 type ProviderFactory = (options: ProviderOptions) => (model: string) => LanguageModel;
 /**
 * 检查 baseUrl 是否为阿里云百炼/DashScope
 */
 function isDashScopeUrl(baseUrl?: string): boolean {
  if (!baseUrl) return false;
  return baseUrl.includes('dashscope');
 }
 // Provider 注册表
 const providers: Record<ProviderType, ProviderFactory> = {
  anthropic: ({ apiKey, baseUrl }) => {
@@ -40,6 +51,11 @@ const providers: Record<ProviderType, ProviderFactory> = {
    return (model) => client(model);
  },
  openai: ({ apiKey, baseUrl }) => {
    // 如果是百炼的 URL，使用 qwen provider
    if (isDashScopeUrl(baseUrl)) {
      const client = createQwen({ apiKey, baseURL: baseUrl });
      return (model) => client(model);
    }
    const client = createOpenAI({ apiKey, baseURL: baseUrl });
    return (model) => client(model);
  },
@@ -236,19 +252,42 @@ export class Agent {
   * @param onStream 流式输出回调
   */
  async chat(userMessage: string | UserInput, onStream?: (text: string) => void): Promise<string> {
    // 处理带图片的消息
    let processedMessage = userMessage;
    if (typeof userMessage !== 'string' && userMessage.images && userMessage.images.length > 0) {
      // 检查当前模型是否支持 vision
      if (!this.supportsVision()) {
        // 不支持 vision，尝试使用 Vision Agent 处理图片
        const visionResult = await this.processImagesWithVisionAgent(
          userMessage.images,
          userMessage.text,
          onStream
        );
        if (visionResult) {
          // 成功，将图片分析结果转换为文本消息
          processedMessage = visionResult;
        } else {
          // 失败，返回错误信息
          return '无法处理图片：当前模型不支持图片理解，且 Vision 服务未配置或调用失败。';
        }
      }
    }
    // 构建消息内容
    let messageContent: string | ContentBlock[];
-    if (typeof userMessage === 'string') {
+    if (typeof processedMessage === 'string') {
      // 纯文本消息
-      messageContent = userMessage;
+      messageContent = processedMessage;
    } else {
      // 带图片的消息
      const blocks: ContentBlock[] = [];
      // 添加图片
-      if (userMessage.images && userMessage.images.length > 0) {
+      if (processedMessage.images && processedMessage.images.length > 0) {
-        for (const img of userMessage.images) {
+        for (const img of processedMessage.images) {
          blocks.push({
            type: 'image',
            image: img.data,
@@ -258,10 +297,10 @@ export class Agent {
      }
      // 添加文本
-      if (userMessage.text) {
+      if (processedMessage.text) {
        blocks.push({
          type: 'text',
-          text: userMessage.text,
+          text: processedMessage.text,
        });
      }
@@ -484,6 +523,73 @@ export class Agent {
    return this.currentAgentMode?.name ?? 'default';
  }
  /**
   * 使用 Vision Agent 处理图片
   * 当主模型不支持 vision 时，委托给 Vision Agent 分析图片
   * @returns 包含图片分析结果的文本消息，或 null 表示失败
   */
  private async processImagesWithVisionAgent(
    images: ImageData[],
    userText?: string,
    onStream?: (text: string) => void
  ): Promise<string | null> {
    // 检查 Vision 配置是否可用
    const visionConfig = loadVisionConfig();
    if (!visionConfig) {
      onStream?.('\n⚠ Vision 服务未配置，无法处理图片\n');
      return null;
    }
    // 获取 Vision Agent
    const visionAgent = agentRegistry.get('vision');
    if (!visionAgent) {
      onStream?.('\n⚠ Vision Agent 未注册\n');
      return null;
    }
    // 确保有工具注册表
    if (!this.registry) {
      onStream?.('\n⚠ 工具注册表未初始化\n');
      return null;
    }
    onStream?.(`\n[委托 Vision Agent (${visionConfig.model}) 分析图片...]\n`);
    // 构建 Vision 配置
    const visionAgentConfig: AgentConfig = {
      ...this.config,
      provider: visionConfig.provider,
      apiKey: visionConfig.apiKey,
      model: visionConfig.model,
      baseUrl: visionConfig.baseUrl,
    };
    // 创建 Vision Agent 执行器
    const executor = new AgentExecutor(visionAgent, visionAgentConfig, this.registry);
    // 构建提示词
    const prompt = userText || '请详细描述这张图片的内容';
    // 执行 Vision 分析
    const result = await executor.execute(prompt, {
      workdir: process.cwd(),
      images,
      onStream: undefined, // Vision Agent 不使用流式输出
    });
    if (!result.success) {
      onStream?.(`\n⚠ Vision 分析失败: ${result.error}\n`);
      return null;
    }
    onStream?.('\n[Vision 分析完成]\n');
    // 构建带分析结果的文本消息
    const combinedText = `[图片分析结果 - 由 ${visionConfig.model} 提供]\n${result.text}\n\n用户问题: ${userText || '(无附加问题)'}`;
    return combinedText;
  }
  /**
   * 检查当前模型是否支持 vision（图片理解）
   */
@@ -1,9 +1,11 @@
 import type { ToolWithMetadata } from '../types.js';
 import type { AgentConfig } from '../../types/index.js';
 import type { ImageData } from '../../agent/types.js';
 import { agentRegistry, AgentExecutor } from '../../agent/index.js';
 import { toolRegistry } from '../registry.js';
 import { SessionManager } from '../../session/index.js';
 import { getAgentManager } from '../../agent/manager.js';
 import { loadVisionConfig } from '../../utils/config.js';
 /**
 * 模型预设映射
@@ -95,6 +97,11 @@ export const taskTool: ToolWithMetadata = {
      description: '是否后台运行。后台运行时立即返回 agentId，使用 agent_output 工具获取结果',
      required: false,
    },
    images: {
      type: 'array',
      description: '图片数据数组（用于 vision 相关任务），每个图片包含 data(base64)、mimeType、filename(可选)',
      required: false,
    },
  },
  metadata: {
    name: 'task',
@@ -110,12 +117,14 @@ export const taskTool: ToolWithMetadata = {
      subagent_type,
      model,
      run_in_background,
      images,
    } = params as {
      description: string;
      prompt: string;
      subagent_type: string;
      model?: string;
      run_in_background?: boolean;
      images?: ImageData[];
    };
    // 检查上下文是否已初始化
@@ -151,7 +160,26 @@ export const taskTool: ToolWithMetadata = {
    // 2. 处理模型选择
    let effectiveConfig = baseConfig;
-    if (model) {
+
    // Vision Agent 特殊处理：使用 VisionConfig 配置
    if (subagent_type === 'vision') {
      const visionConfig = loadVisionConfig();
      if (!visionConfig) {
        return {
          success: false,
          output: '',
          error: 'Vision Agent 需要配置 Vision 服务。请在配置文件中设置 visionProvider、visionApiKey 等参数。',
        };
      }
      // 使用 Vision 配置覆盖 baseConfig
      effectiveConfig = {
        ...baseConfig,
        provider: visionConfig.provider,
        apiKey: visionConfig.apiKey,
        model: visionConfig.model,
        baseUrl: visionConfig.baseUrl,
      };
    } else if (model) {
      const modelName = MODEL_PRESETS[model];
      if (!modelName) {
        return {
@@ -180,6 +208,7 @@ export const taskTool: ToolWithMetadata = {
        {
          parentSessionId,
          workdir: process.cwd(),
          images,
        }
      );
@@ -209,6 +238,7 @@ export const taskTool: ToolWithMetadata = {
    const result = await executor.execute(prompt, {
      parentSessionId,
      workdir: process.cwd(),
      images,
      onStream: undefined, // 子任务不使用流式输出
    });
@@ -12,11 +12,6 @@ import {
  loadImages,
  formatFileSize,
 } from '../utils/image.js';
 import {
  analyzeImages,
  isVisionAvailable,
  getVisionInfo,
 } from '../utils/vision.js';
 import type { UserInput } from '../types/index.js';
 export class TerminalUI {
@@ -340,67 +335,6 @@ export class TerminalUI {
    };
  }
  // 处理不支持 Vision 的情况
  private async handleNoVisionSupport(
    userInput: UserInput
  ): Promise<string | null> {
    // 检查 Vision 服务是否可用
    if (!isVisionAvailable()) {
      console.log(chalk.yellow('\n⚠ 当前模型不支持图片理解，且未配置 Vision 服务'));
      console.log(chalk.gray('请在配置文件中设置 visionProvider、visionApiKey 等参数'));
      console.log(chalk.gray('或切换到支持图片理解的模型（如 Claude、GPT-4o）\n'));
      return null;
    }
    const visionInfo = getVisionInfo();
    // 提示用户选择
    console.log(chalk.yellow('\n⚠ 当前模型不支持图片理解'));
    console.log(chalk.gray('请选择处理方式:'));
    console.log(chalk.white(`  1. 使用 Vision 服务 (${visionInfo.model}) 分析图片后继续对话`));
    console.log(chalk.white('  2. 取消本次输入'));
    const choice = await new Promise<string>((resolve) => {
      this.rl.question(chalk.green('选择 (1/2): '), resolve);
    });
    if (choice.trim() !== '1') {
      console.log(chalk.gray('已取消\n'));
      return null;
    }
    // 使用 Vision 服务分析图片
    console.log(chalk.cyan(`\n正在使用 ${visionInfo.model} 分析图片...`));
    const images = userInput.images || [];
    if (images.length === 0) {
      console.log(chalk.red('没有图片需要分析\n'));
      return null;
    }
    // 调用 Vision API 分析图片
    const result = await analyzeImages(
      images.map(img => ({
        data: img.data,
        mimeType: img.mimeType,
        filename: img.filename,
      })),
      userInput.text || undefined
    );
    if (!result.success) {
      console.log(chalk.red(`\n图片分析失败: ${result.error}\n`));
      return null;
    }
    console.log(chalk.green('✓ 图片分析完成\n'));
    // 构建带图片描述的文本消息
    const combinedText = `[Vision 服务分析结果]\n${result.description}\n\n用户原始问题: ${userInput.text}`;
    return combinedText;
  }
  // 提问并获取用户输入
  private prompt(): Promise<string> {
    return new Promise((resolve, reject) => {
@@ -507,20 +441,9 @@ export class TerminalUI {
          continue;
        }
-        let { userInput, hasImages } = processed;
+        const { userInput, hasImages } = processed;
-        // 如果有图片且当前模型不支持 vision
+        // 发送给 AI（如果模型不支持 vision，Agent 会自动委托 Vision Agent 处理）
        if (hasImages && !this.agent.supportsVision()) {
          const fallbackText = await this.handleNoVisionSupport(userInput);
          if (!fallbackText) {
            continue;
          }
          // 使用 Vision 分析结果替代图片
          userInput = { text: fallbackText };
          hasImages = false;
        }
        // 发送给 AI
        process.stdout.write(chalk.gray('思考中...'));
        try {
@@ -1,217 +0,0 @@
 import { loadVisionConfig, type VisionConfig } from './config.js';
 /**
 * Vision 服务 - 用于图片理解
 * 当主模型不支持 vision 时，使用独立的 Vision 服务分析图片
 * 使用原生 fetch 调用 OpenAI 兼容接口，以确保与百炼等服务兼容
 */
 export interface ImageData {
  /** base64 编码的图片数据 */
  data: string;
  /** MIME 类型 */
  mimeType: string;
  /** 文件名（可选） */
  filename?: string;
 }
 export interface VisionAnalysisResult {
  success: boolean;
  /** 图片描述 */
  description: string;
  /** 错误信息（如果失败） */
  error?: string;
 }
 /**
 * 分析单张图片
 */
 export async function analyzeImage(
  image: ImageData,
  prompt?: string
 ): Promise<VisionAnalysisResult> {
  const config = loadVisionConfig();
  if (!config) {
    return {
      success: false,
      description: '',
      error: '未配置 Vision 服务。请在配置文件中设置 visionProvider、visionApiKey 等参数。',
    };
  }
  try {
    const description = await callVisionAPI(config, [image], prompt);
    return {
      success: true,
      description,
    };
  } catch (error) {
    return {
      success: false,
      description: '',
      error: error instanceof Error ? error.message : String(error),
    };
  }
 }
 /**
 * 批量分析图片
 */
 export async function analyzeImages(
  images: ImageData[],
  prompt?: string
 ): Promise<VisionAnalysisResult> {
  const config = loadVisionConfig();
  if (!config) {
    return {
      success: false,
      description: '',
      error: '未配置 Vision 服务。请在配置文件中设置 visionProvider、visionApiKey 等参数。',
    };
  }
  if (images.length === 0) {
    return {
      success: false,
      description: '',
      error: '没有提供图片',
    };
  }
  try {
    const description = await callVisionAPI(config, images, prompt);
    return {
      success: true,
      description,
    };
  } catch (error) {
    return {
      success: false,
      description: '',
      error: error instanceof Error ? error.message : String(error),
    };
  }
 }
 /**
 * 调用 Vision API
 * 使用原生 fetch 调用 OpenAI 兼容接口，确保与百炼等服务兼容
 */
 async function callVisionAPI(
  config: VisionConfig,
  images: ImageData[],
  userPrompt?: string
 ): Promise<string> {
  // 目前只支持 OpenAI 兼容的 Vision API（如百炼的 qwen-vl-plus）
  if (config.provider !== 'openai') {
    throw new Error(`暂不支持 ${config.provider} 的 Vision 服务`);
  }
  // 构建消息内容（OpenAI Vision API 格式）
  const content: Array<
    | { type: 'text'; text: string }
    | { type: 'image_url'; image_url: { url: string } }
  > = [];
  // 添加图片（使用 data URL 格式）
  for (const img of images) {
    content.push({
      type: 'image_url',
      image_url: {
        url: `data:${img.mimeType};base64,${img.data}`,
      },
    });
  }
  // 添加提示文本
  const defaultPrompt = images.length === 1
    ? '请详细描述这张图片的内容，包括主要元素、文字、颜色、布局等信息。'
    : `请详细描述这 ${images.length} 张图片的内容，包括主要元素、文字、颜色、布局等信息。`;
  content.push({
    type: 'text',
    text: userPrompt || defaultPrompt,
  });
  // 构建请求体
  const requestBody = {
    model: config.model,
    messages: [
      {
        role: 'user',
        content,
      },
    ],
    max_tokens: 2000,
  };
  // 确定 API 端点
  const baseUrl = config.baseUrl || 'https://api.openai.com/v1';
  const endpoint = `${baseUrl.replace(/\/$/, '')}/chat/completions`;
  // 发送请求
  const response = await fetch(endpoint, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      'Authorization': `Bearer ${config.apiKey}`,
    },
    body: JSON.stringify(requestBody),
  });
  if (!response.ok) {
    const errorText = await response.text();
    let errorMessage = `API 请求失败: ${response.status} ${response.statusText}`;
    try {
      const errorJson = JSON.parse(errorText);
      if (errorJson.error?.message) {
        errorMessage = errorJson.error.message;
      }
    } catch {
      if (errorText) {
        errorMessage += ` - ${errorText}`;
      }
    }
    throw new Error(errorMessage);
  }
  const result = await response.json() as {
    choices?: Array<{
      message?: {
        content?: string;
      };
    }>;
  };
  const text = result.choices?.[0]?.message?.content;
  if (!text) {
    throw new Error('API 返回了空响应');
  }
  return text;
 }
 /**
 * 检查 Vision 服务是否可用
 */
 export function isVisionAvailable(): boolean {
  const config = loadVisionConfig();
  return config !== null;
 }
 /**
 * 获取 Vision 配置信息（用于显示）
 */
 export function getVisionInfo(): { available: boolean; provider?: string; model?: string } {
  const config = loadVisionConfig();
  if (!config) {
    return { available: false };
  }
  return {
    available: true,
    provider: config.provider,
    model: config.model,
  };
 }