feat: 重构 Vision 处理架构，支持自动委托 Vision Agent

- 主 Agent 收到图片后自动检测是否支持 vision，不支持时委托 Vision Agent 处理 - 添加 qwen-ai-provider-v5 支持百炼/DashScope API - Task 工具支持 images 参数，可传递图片给子 Agent - Vision Agent 使用独立的 VisionConfig 配置 - 移除 UI 层的 vision fallback 逻辑，统一在 Agent 层处理 - 删除废弃的 src/utils/vision.ts（原生 fetch 实现）
2025-12-11 18:21:36 +08:00
parent 32fdb244f0
commit abbb03bf50
10 changed files with 289 additions and 308 deletions
@@ -19,6 +19,7 @@
        "inquirer": "^12.0.0",
        "js-yaml": "^4.1.1",
        "ora": "^8.1.0",
+        "qwen-ai-provider-v5": "^1.0.2",
        "tree-sitter-bash": "^0.25.1",
        "uuid": "^13.0.0",
        "vscode-jsonrpc": "^8.2.1",
@@ -2594,6 +2595,22 @@
      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
      "license": "MIT"
    },
+    "node_modules/qwen-ai-provider-v5": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/qwen-ai-provider-v5/-/qwen-ai-provider-v5-1.0.2.tgz",
+      "integrity": "sha512-IMweAFhHxM2OZzeZKyDUfcxCQCLkFioQv9TkprAXttV6XeTBTSjjUc17S9dUW4rOgtWLsCXoAkaAPUHj1jQYtg==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@ai-sdk/provider": "^2.0.0",
+        "@ai-sdk/provider-utils": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.76 || ^4.1.8"
+      }
+    },
    "node_modules/resolve-pkg-maps": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
@@ -36,6 +36,7 @@
    "inquirer": "^12.0.0",
    "js-yaml": "^4.1.1",
    "ora": "^8.1.0",
+    "qwen-ai-provider-v5": "^1.0.2",
    "tree-sitter-bash": "^0.25.1",
    "uuid": "^13.0.0",
    "vscode-jsonrpc": "^8.2.1",
@@ -1,6 +1,7 @@
 import { createAnthropic } from '@ai-sdk/anthropic';
 import { createDeepSeek } from '@ai-sdk/deepseek';
 import { createOpenAI } from '@ai-sdk/openai';
+import { createQwen } from 'qwen-ai-provider-v5';
 import {
  generateText,
  streamText,
@@ -9,13 +10,14 @@ import {
  type Tool as AITool,
  type LanguageModel,
 } from 'ai';
-import type { Tool, ToolResult, ProviderType, AgentConfig } from '../types/index.js';
+import type { Tool, ToolResult, ProviderType, AgentConfig, ContentBlock } from '../types/index.js';
 import { buildZodSchema } from '../types/index.js';
 import { ToolRegistry } from '../tools/registry.js';
 import type {
  AgentInfo,
  AgentExecutionContext,
  AgentExecutionResult,
+  ImageData,
 } from './types.js';
 import { checkBashPermission } from './permission-merger.js';

@@ -28,6 +30,14 @@ interface ProviderOptions {
 // Provider 工厂函数类型
 type ProviderFactory = (options: ProviderOptions) => (model: string) => LanguageModel;

+/**
+ * 检查 baseUrl 是否为阿里云百炼/DashScope
+ */
+function isDashScopeUrl(baseUrl?: string): boolean {
+  if (!baseUrl) return false;
+  return baseUrl.includes('dashscope');
+}
+
 // Provider 注册表
 const providers: Record<ProviderType, ProviderFactory> = {
  anthropic: ({ apiKey, baseUrl }) => {
@@ -39,6 +49,11 @@ const providers: Record<ProviderType, ProviderFactory> = {
    return (model) => client(model);
  },
  openai: ({ apiKey, baseUrl }) => {
+    // 如果是百炼的 URL，使用 qwen provider
+    if (isDashScopeUrl(baseUrl)) {
+      const client = createQwen({ apiKey, baseURL: baseUrl });
+      return (model) => client(model);
+    }
    const client = createOpenAI({ apiKey, baseURL: baseUrl });
    return (model) => client(model);
  },
@@ -79,7 +94,7 @@ export class AgentExecutor {
    prompt: string,
    context: AgentExecutionContext
  ): Promise<AgentExecutionResult> {
-    const { onStream, onToolCall, onToolResult } = context;
+    const { onStream, onToolCall, onToolResult, images } = context;

    // 获取过滤后的工具
    const tools = this.getFilteredTools();
@@ -93,11 +108,14 @@ export class AgentExecutor {
    const maxSteps = this.agentInfo.maxSteps ?? 10;
    const maxTokens = this.agentInfo.model?.maxTokens ?? this.baseConfig.maxTokens;

+    // 构建消息内容（支持图片）
+    const messageContent = this.buildMessageContent(prompt, images);
+
    // 构建初始消息
    const messages: ModelMessage[] = [
      {
        role: 'user',
-        content: prompt,
+        content: messageContent,
      },
    ];

@@ -308,4 +326,39 @@ export class AgentExecutor {
    // 否则使用基础配置的 systemPrompt
    return this.baseConfig.systemPrompt;
  }
+
+  /**
+   * 构建消息内容（支持图片）
+   */
+  private buildMessageContent(
+    prompt: string,
+    images?: ImageData[]
+  ): string | ContentBlock[] {
+    // 如果没有图片，直接返回文本
+    if (!images || images.length === 0) {
+      return prompt;
+    }
+
+    // 构建多模态内容
+    const blocks: ContentBlock[] = [];
+
+    // 先添加图片
+    for (const img of images) {
+      blocks.push({
+        type: 'image',
+        image: img.data,
+        mimeType: img.mimeType,
+      });
+    }
+
+    // 再添加文本
+    if (prompt) {
+      blocks.push({
+        type: 'text',
+        text: prompt,
+      });
+    }
+
+    return blocks;
+  }
 }
@@ -4,6 +4,7 @@ import { exploreAgent } from './explore.js';
 import { codeReviewerAgent } from './code-reviewer.js';
 import { buildAgent } from './build.js';
 import { planAgent } from './plan.js';
+import { visionAgent } from './vision.js';

 /**
 * 预设 Agent 集合
@@ -14,6 +15,7 @@ export const presetAgents: Record<string, Omit<AgentInfo, 'name'>> = {
  'code-reviewer': codeReviewerAgent,
  build: buildAgent,
  plan: planAgent,
+  vision: visionAgent,
 };

 /**
@@ -30,4 +32,4 @@ export function isPresetAgent(name: string): boolean {
  return name in presetAgents;
 }

-export { generalAgent, exploreAgent, codeReviewerAgent, buildAgent, planAgent };
+export { generalAgent, exploreAgent, codeReviewerAgent, buildAgent, planAgent, visionAgent };
@@ -0,0 +1,52 @@
+import type { AgentInfo } from '../types.js';
+
+/**
+ * Vision Agent
+ * 图片理解专家，使用多模态模型分析图片内容
+ */
+export const visionAgent: Omit<AgentInfo, 'name'> = {
+  description: '图片理解专家，分析截图、设计稿、架构图等',
+  mode: 'subagent',
+  prompt: `你是一个专业的图片分析专家。你的任务是详细描述和分析用户提供的图片内容。
+
+分析要点：
+1. **整体概述**：图片的类型（截图、设计稿、图表、照片等）和主要内容
+2. **布局结构**：页面/图片的整体布局、区域划分
+3. **文字内容**：提取图片中的所有可见文字（完整、准确）
+4. **UI 元素**：按钮、输入框、菜单、图标等元素及其状态
+5. **视觉细节**：颜色、字体、间距、对齐等设计细节
+6. **交互状态**：hover、选中、禁用等状态指示
+7. **潜在问题**：如果用户询问问题，指出可能的问题或改进点
+
+输出格式：
+- 使用清晰的 Markdown 格式
+- 先给出整体概述，再逐一分析细节
+- 如果是 UI 截图，按区域从上到下、从左到右描述
+- 提取的文字用引号标注
+
+注意事项：
+- 描述要准确、具体，避免模糊表述
+- 如果某些内容不清晰，明确说明
+- 根据用户的问题重点分析相关部分`,
+  tools: {
+    enabled: [],
+    noTask: true,
+  },
+  permission: {
+    file: {
+      read: 'deny',
+      write: 'deny',
+      edit: 'deny',
+      delete: 'deny',
+    },
+    bash: {
+      enabled: false,
+    },
+    git: {
+      read: 'deny',
+      write: 'deny',
+      dangerous: 'deny',
+    },
+  },
+  maxSteps: 1,
+};
@@ -131,6 +131,18 @@ export interface AgentConfigFile {
  agents?: Record<string, Omit<AgentInfo, 'name'>>;
 }

+/**
+ * 图片数据（用于 Agent 执行上下文）
+ */
+export interface ImageData {
+  /** base64 编码的图片数据 */
+  data: string;
+  /** MIME 类型 */
+  mimeType: string;
+  /** 文件名（可选） */
+  filename?: string;
+}
+
 /**
 * Agent 执行上下文
 */
@@ -139,6 +151,8 @@ export interface AgentExecutionContext {
  parentSessionId?: string;
  /** 工作目录 */
  workdir: string;
+  /** 图片数据（用于支持多模态输入） */
+  images?: ImageData[];
  /** 回调：输出流 */
  onStream?: (text: string) => void;
  /** 回调：工具调用 */
@@ -1,6 +1,7 @@
 import { createAnthropic } from '@ai-sdk/anthropic';
 import { createDeepSeek } from '@ai-sdk/deepseek';
 import { createOpenAI } from '@ai-sdk/openai';
+import { createQwen } from 'qwen-ai-provider-v5';
 import {
  generateText,
  streamText,
@@ -18,7 +19,9 @@ import {
  type TokenUsage,
  type CompressionConfig,
 } from '../context/index.js';
-import type { AgentInfo } from '../agent/types.js';
+import type { AgentInfo, ImageData } from '../agent/types.js';
+import { agentRegistry, AgentExecutor } from '../agent/index.js';
+import { loadVisionConfig } from '../utils/config.js';

 // Provider 配置
 interface ProviderOptions {
@@ -29,6 +32,14 @@ interface ProviderOptions {
 // Provider 工厂函数类型
 type ProviderFactory = (options: ProviderOptions) => (model: string) => LanguageModel;

+/**
+ * 检查 baseUrl 是否为阿里云百炼/DashScope
+ */
+function isDashScopeUrl(baseUrl?: string): boolean {
+  if (!baseUrl) return false;
+  return baseUrl.includes('dashscope');
+}
+
 // Provider 注册表
 const providers: Record<ProviderType, ProviderFactory> = {
  anthropic: ({ apiKey, baseUrl }) => {
@@ -40,6 +51,11 @@ const providers: Record<ProviderType, ProviderFactory> = {
    return (model) => client(model);
  },
  openai: ({ apiKey, baseUrl }) => {
+    // 如果是百炼的 URL，使用 qwen provider
+    if (isDashScopeUrl(baseUrl)) {
+      const client = createQwen({ apiKey, baseURL: baseUrl });
+      return (model) => client(model);
+    }
    const client = createOpenAI({ apiKey, baseURL: baseUrl });
    return (model) => client(model);
  },
@@ -236,19 +252,42 @@ export class Agent {
   * @param onStream 流式输出回调
   */
  async chat(userMessage: string | UserInput, onStream?: (text: string) => void): Promise<string> {
+    // 处理带图片的消息
+    let processedMessage = userMessage;
+
+    if (typeof userMessage !== 'string' && userMessage.images && userMessage.images.length > 0) {
+      // 检查当前模型是否支持 vision
+      if (!this.supportsVision()) {
+        // 不支持 vision，尝试使用 Vision Agent 处理图片
+        const visionResult = await this.processImagesWithVisionAgent(
+          userMessage.images,
+          userMessage.text,
+          onStream
+        );
+
+        if (visionResult) {
+          // 成功，将图片分析结果转换为文本消息
+          processedMessage = visionResult;
+        } else {
+          // 失败，返回错误信息
+          return '无法处理图片：当前模型不支持图片理解，且 Vision 服务未配置或调用失败。';
+        }
+      }
+    }
+
    // 构建消息内容
    let messageContent: string | ContentBlock[];

-    if (typeof userMessage === 'string') {
+    if (typeof processedMessage === 'string') {
      // 纯文本消息
-      messageContent = userMessage;
+      messageContent = processedMessage;
    } else {
      // 带图片的消息
      const blocks: ContentBlock[] = [];

      // 添加图片
-      if (userMessage.images && userMessage.images.length > 0) {
-        for (const img of userMessage.images) {
+      if (processedMessage.images && processedMessage.images.length > 0) {
+        for (const img of processedMessage.images) {
          blocks.push({
            type: 'image',
            image: img.data,
@@ -258,10 +297,10 @@ export class Agent {
      }

      // 添加文本
-      if (userMessage.text) {
+      if (processedMessage.text) {
        blocks.push({
          type: 'text',
-          text: userMessage.text,
+          text: processedMessage.text,
        });
      }

@@ -484,6 +523,73 @@ export class Agent {
    return this.currentAgentMode?.name ?? 'default';
  }

+  /**
+   * 使用 Vision Agent 处理图片
+   * 当主模型不支持 vision 时，委托给 Vision Agent 分析图片
+   * @returns 包含图片分析结果的文本消息，或 null 表示失败
+   */
+  private async processImagesWithVisionAgent(
+    images: ImageData[],
+    userText?: string,
+    onStream?: (text: string) => void
+  ): Promise<string | null> {
+    // 检查 Vision 配置是否可用
+    const visionConfig = loadVisionConfig();
+    if (!visionConfig) {
+      onStream?.('\n⚠ Vision 服务未配置，无法处理图片\n');
+      return null;
+    }
+
+    // 获取 Vision Agent
+    const visionAgent = agentRegistry.get('vision');
+    if (!visionAgent) {
+      onStream?.('\n⚠ Vision Agent 未注册\n');
+      return null;
+    }
+
+    // 确保有工具注册表
+    if (!this.registry) {
+      onStream?.('\n⚠ 工具注册表未初始化\n');
+      return null;
+    }
+
+    onStream?.(`\n[委托 Vision Agent (${visionConfig.model}) 分析图片...]\n`);
+
+    // 构建 Vision 配置
+    const visionAgentConfig: AgentConfig = {
+      ...this.config,
+      provider: visionConfig.provider,
+      apiKey: visionConfig.apiKey,
+      model: visionConfig.model,
+      baseUrl: visionConfig.baseUrl,
+    };
+
+    // 创建 Vision Agent 执行器
+    const executor = new AgentExecutor(visionAgent, visionAgentConfig, this.registry);
+
+    // 构建提示词
+    const prompt = userText || '请详细描述这张图片的内容';
+
+    // 执行 Vision 分析
+    const result = await executor.execute(prompt, {
+      workdir: process.cwd(),
+      images,
+      onStream: undefined, // Vision Agent 不使用流式输出
+    });
+
+    if (!result.success) {
+      onStream?.(`\n⚠ Vision 分析失败: ${result.error}\n`);
+      return null;
+    }
+
+    onStream?.('\n[Vision 分析完成]\n');
+
+    // 构建带分析结果的文本消息
+    const combinedText = `[图片分析结果 - 由 ${visionConfig.model} 提供]\n${result.text}\n\n用户问题: ${userText || '(无附加问题)'}`;
+
+    return combinedText;
+  }
+
  /**
   * 检查当前模型是否支持 vision（图片理解）
   */
@@ -1,9 +1,11 @@
 import type { ToolWithMetadata } from '../types.js';
 import type { AgentConfig } from '../../types/index.js';
+import type { ImageData } from '../../agent/types.js';
 import { agentRegistry, AgentExecutor } from '../../agent/index.js';
 import { toolRegistry } from '../registry.js';
 import { SessionManager } from '../../session/index.js';
 import { getAgentManager } from '../../agent/manager.js';
+import { loadVisionConfig } from '../../utils/config.js';

 /**
 * 模型预设映射
@@ -95,6 +97,11 @@ export const taskTool: ToolWithMetadata = {
      description: '是否后台运行。后台运行时立即返回 agentId，使用 agent_output 工具获取结果',
      required: false,
    },
+    images: {
+      type: 'array',
+      description: '图片数据数组（用于 vision 相关任务），每个图片包含 data(base64)、mimeType、filename(可选)',
+      required: false,
+    },
  },
  metadata: {
    name: 'task',
@@ -110,12 +117,14 @@ export const taskTool: ToolWithMetadata = {
      subagent_type,
      model,
      run_in_background,
+      images,
    } = params as {
      description: string;
      prompt: string;
      subagent_type: string;
      model?: string;
      run_in_background?: boolean;
+      images?: ImageData[];
    };

    // 检查上下文是否已初始化
@@ -151,7 +160,26 @@ export const taskTool: ToolWithMetadata = {

    // 2. 处理模型选择
    let effectiveConfig = baseConfig;
-    if (model) {
+
+    // Vision Agent 特殊处理：使用 VisionConfig 配置
+    if (subagent_type === 'vision') {
+      const visionConfig = loadVisionConfig();
+      if (!visionConfig) {
+        return {
+          success: false,
+          output: '',
+          error: 'Vision Agent 需要配置 Vision 服务。请在配置文件中设置 visionProvider、visionApiKey 等参数。',
+        };
+      }
+      // 使用 Vision 配置覆盖 baseConfig
+      effectiveConfig = {
+        ...baseConfig,
+        provider: visionConfig.provider,
+        apiKey: visionConfig.apiKey,
+        model: visionConfig.model,
+        baseUrl: visionConfig.baseUrl,
+      };
+    } else if (model) {
      const modelName = MODEL_PRESETS[model];
      if (!modelName) {
        return {
@@ -180,6 +208,7 @@ export const taskTool: ToolWithMetadata = {
        {
          parentSessionId,
          workdir: process.cwd(),
+          images,
        }
      );

@@ -209,6 +238,7 @@ export const taskTool: ToolWithMetadata = {
    const result = await executor.execute(prompt, {
      parentSessionId,
      workdir: process.cwd(),
+      images,
      onStream: undefined, // 子任务不使用流式输出
    });

@@ -12,11 +12,6 @@ import {
  loadImages,
  formatFileSize,
 } from '../utils/image.js';
-import {
-  analyzeImages,
-  isVisionAvailable,
-  getVisionInfo,
-} from '../utils/vision.js';
 import type { UserInput } from '../types/index.js';

 export class TerminalUI {
@@ -340,67 +335,6 @@ export class TerminalUI {
    };
  }

-  // 处理不支持 Vision 的情况
-  private async handleNoVisionSupport(
-    userInput: UserInput
-  ): Promise<string | null> {
-    // 检查 Vision 服务是否可用
-    if (!isVisionAvailable()) {
-      console.log(chalk.yellow('\n⚠ 当前模型不支持图片理解，且未配置 Vision 服务'));
-      console.log(chalk.gray('请在配置文件中设置 visionProvider、visionApiKey 等参数'));
-      console.log(chalk.gray('或切换到支持图片理解的模型（如 Claude、GPT-4o）\n'));
-      return null;
-    }
-
-    const visionInfo = getVisionInfo();
-
-    // 提示用户选择
-    console.log(chalk.yellow('\n⚠ 当前模型不支持图片理解'));
-    console.log(chalk.gray('请选择处理方式:'));
-    console.log(chalk.white(`  1. 使用 Vision 服务 (${visionInfo.model}) 分析图片后继续对话`));
-    console.log(chalk.white('  2. 取消本次输入'));
-
-    const choice = await new Promise<string>((resolve) => {
-      this.rl.question(chalk.green('选择 (1/2): '), resolve);
-    });
-
-    if (choice.trim() !== '1') {
-      console.log(chalk.gray('已取消\n'));
-      return null;
-    }
-
-    // 使用 Vision 服务分析图片
-    console.log(chalk.cyan(`\n正在使用 ${visionInfo.model} 分析图片...`));
-
-    const images = userInput.images || [];
-    if (images.length === 0) {
-      console.log(chalk.red('没有图片需要分析\n'));
-      return null;
-    }
-
-    // 调用 Vision API 分析图片
-    const result = await analyzeImages(
-      images.map(img => ({
-        data: img.data,
-        mimeType: img.mimeType,
-        filename: img.filename,
-      })),
-      userInput.text || undefined
-    );
-
-    if (!result.success) {
-      console.log(chalk.red(`\n图片分析失败: ${result.error}\n`));
-      return null;
-    }
-
-    console.log(chalk.green('✓ 图片分析完成\n'));
-
-    // 构建带图片描述的文本消息
-    const combinedText = `[Vision 服务分析结果]\n${result.description}\n\n用户原始问题: ${userInput.text}`;
-
-    return combinedText;
-  }
-
  // 提问并获取用户输入
  private prompt(): Promise<string> {
    return new Promise((resolve, reject) => {
@@ -507,20 +441,9 @@ export class TerminalUI {
          continue;
        }

-        let { userInput, hasImages } = processed;
+        const { userInput, hasImages } = processed;

-        // 如果有图片且当前模型不支持 vision
-        if (hasImages && !this.agent.supportsVision()) {
-          const fallbackText = await this.handleNoVisionSupport(userInput);
-          if (!fallbackText) {
-            continue;
-          }
-          // 使用 Vision 分析结果替代图片
-          userInput = { text: fallbackText };
-          hasImages = false;
-        }
-
-        // 发送给 AI
+        // 发送给 AI（如果模型不支持 vision，Agent 会自动委托 Vision Agent 处理）
        process.stdout.write(chalk.gray('思考中...'));

        try {
@@ -1,217 +0,0 @@
-import { loadVisionConfig, type VisionConfig } from './config.js';
-
-/**
- * Vision 服务 - 用于图片理解
- * 当主模型不支持 vision 时，使用独立的 Vision 服务分析图片
- * 使用原生 fetch 调用 OpenAI 兼容接口，以确保与百炼等服务兼容
- */
-
-export interface ImageData {
-  /** base64 编码的图片数据 */
-  data: string;
-  /** MIME 类型 */
-  mimeType: string;
-  /** 文件名（可选） */
-  filename?: string;
-}
-
-export interface VisionAnalysisResult {
-  success: boolean;
-  /** 图片描述 */
-  description: string;
-  /** 错误信息（如果失败） */
-  error?: string;
-}
-
-/**
- * 分析单张图片
- */
-export async function analyzeImage(
-  image: ImageData,
-  prompt?: string
-): Promise<VisionAnalysisResult> {
-  const config = loadVisionConfig();
-
-  if (!config) {
-    return {
-      success: false,
-      description: '',
-      error: '未配置 Vision 服务。请在配置文件中设置 visionProvider、visionApiKey 等参数。',
-    };
-  }
-
-  try {
-    const description = await callVisionAPI(config, [image], prompt);
-    return {
-      success: true,
-      description,
-    };
-  } catch (error) {
-    return {
-      success: false,
-      description: '',
-      error: error instanceof Error ? error.message : String(error),
-    };
-  }
-}
-
-/**
- * 批量分析图片
- */
-export async function analyzeImages(
-  images: ImageData[],
-  prompt?: string
-): Promise<VisionAnalysisResult> {
-  const config = loadVisionConfig();
-
-  if (!config) {
-    return {
-      success: false,
-      description: '',
-      error: '未配置 Vision 服务。请在配置文件中设置 visionProvider、visionApiKey 等参数。',
-    };
-  }
-
-  if (images.length === 0) {
-    return {
-      success: false,
-      description: '',
-      error: '没有提供图片',
-    };
-  }
-
-  try {
-    const description = await callVisionAPI(config, images, prompt);
-    return {
-      success: true,
-      description,
-    };
-  } catch (error) {
-    return {
-      success: false,
-      description: '',
-      error: error instanceof Error ? error.message : String(error),
-    };
-  }
-}
-
-/**
- * 调用 Vision API
- * 使用原生 fetch 调用 OpenAI 兼容接口，确保与百炼等服务兼容
- */
-async function callVisionAPI(
-  config: VisionConfig,
-  images: ImageData[],
-  userPrompt?: string
-): Promise<string> {
-  // 目前只支持 OpenAI 兼容的 Vision API（如百炼的 qwen-vl-plus）
-  if (config.provider !== 'openai') {
-    throw new Error(`暂不支持 ${config.provider} 的 Vision 服务`);
-  }
-
-  // 构建消息内容（OpenAI Vision API 格式）
-  const content: Array<
-    | { type: 'text'; text: string }
-    | { type: 'image_url'; image_url: { url: string } }
-  > = [];
-
-  // 添加图片（使用 data URL 格式）
-  for (const img of images) {
-    content.push({
-      type: 'image_url',
-      image_url: {
-        url: `data:${img.mimeType};base64,${img.data}`,
-      },
-    });
-  }
-
-  // 添加提示文本
-  const defaultPrompt = images.length === 1
-    ? '请详细描述这张图片的内容，包括主要元素、文字、颜色、布局等信息。'
-    : `请详细描述这 ${images.length} 张图片的内容，包括主要元素、文字、颜色、布局等信息。`;
-
-  content.push({
-    type: 'text',
-    text: userPrompt || defaultPrompt,
-  });
-
-  // 构建请求体
-  const requestBody = {
-    model: config.model,
-    messages: [
-      {
-        role: 'user',
-        content,
-      },
-    ],
-    max_tokens: 2000,
-  };
-
-  // 确定 API 端点
-  const baseUrl = config.baseUrl || 'https://api.openai.com/v1';
-  const endpoint = `${baseUrl.replace(/\/$/, '')}/chat/completions`;
-
-  // 发送请求
-  const response = await fetch(endpoint, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      'Authorization': `Bearer ${config.apiKey}`,
-    },
-    body: JSON.stringify(requestBody),
-  });
-
-  if (!response.ok) {
-    const errorText = await response.text();
-    let errorMessage = `API 请求失败: ${response.status} ${response.statusText}`;
-    try {
-      const errorJson = JSON.parse(errorText);
-      if (errorJson.error?.message) {
-        errorMessage = errorJson.error.message;
-      }
-    } catch {
-      if (errorText) {
-        errorMessage += ` - ${errorText}`;
-      }
-    }
-    throw new Error(errorMessage);
-  }
-
-  const result = await response.json() as {
-    choices?: Array<{
-      message?: {
-        content?: string;
-      };
-    }>;
-  };
-
-  const text = result.choices?.[0]?.message?.content;
-  if (!text) {
-    throw new Error('API 返回了空响应');
-  }
-
-  return text;
-}
-
-/**
- * 检查 Vision 服务是否可用
- */
-export function isVisionAvailable(): boolean {
-  const config = loadVisionConfig();
-  return config !== null;
-}
-
-/**
- * 获取 Vision 配置信息（用于显示）
- */
-export function getVisionInfo(): { available: boolean; provider?: string; model?: string } {
-  const config = loadVisionConfig();
-  if (!config) {
-    return { available: false };
-  }
-  return {
-    available: true,
-    provider: config.provider,
-    model: config.model,
-  };
-}