feat: social-mcp 初始实现
多平台社交自动化 MCP 服务,首批支持小红书。 - 13 个 MCP 工具:登录管理、内容浏览、发布、互动 - 13 个 REST API 端点,支持 Bearer token 认证和限流 - BrowserManager:串行队列、背压、崩溃恢复 - Cookie 持久化:原子写入、0600 权限 - 安全:DNS rebinding 防御、错误脱敏、深层日志 redact - Docker 部署支持 - 28 个单元测试全部通过
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
# Server
|
||||
PORT=3000
|
||||
HOST=127.0.0.1
|
||||
|
||||
# Browser
|
||||
HEADLESS=true
|
||||
# BROWSER_BIN=/path/to/chromium # Optional: custom Chromium binary path
|
||||
|
||||
# Allow remote access (DANGEROUS - only set if you understand the risk)
|
||||
# ALLOW_REMOTE=yes-i-understand-the-risk
|
||||
|
||||
# Logging
|
||||
# NODE_ENV=production
|
||||
# LOG_LEVEL=info
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
node_modules/
|
||||
dist/
|
||||
*.tsbuildinfo
|
||||
|
||||
# Environment
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Cookie data (sensitive)
|
||||
.social-mcp/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Test coverage
|
||||
coverage/
|
||||
@@ -0,0 +1,68 @@
|
||||
# Social MCP
|
||||
|
||||
Multi-platform social media automation MCP service.
|
||||
|
||||
## Tech Stack
|
||||
|
||||
- TypeScript 5.x + Node.js 22 LTS
|
||||
- rebrowser-playwright (anti-detection Playwright fork)
|
||||
- @modelcontextprotocol/sdk ^1.27
|
||||
- Express ^4
|
||||
- pino ^9 (structured logging with redact)
|
||||
- zod ^3.25 (NOT v4 — incompatible with MCP SDK v1.x)
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── index.ts # Entry point + graceful shutdown
|
||||
├── server/
|
||||
│ ├── app.ts # Express + MCP server
|
||||
│ └── middleware.ts # DNS rebinding guard, error handling
|
||||
├── browser/
|
||||
│ └── manager.ts # BrowserManager (serial queue, timeouts, crash recovery)
|
||||
├── cookie/
|
||||
│ └── store.ts # CookieStore (per-platform, 0600 permissions)
|
||||
├── config/
|
||||
│ └── index.ts # Environment config
|
||||
├── utils/
|
||||
│ ├── logger.ts # pino logger with deep redact
|
||||
│ ├── errors.ts # Error classification + sanitization
|
||||
│ └── downloader.ts # Media download + path validation
|
||||
└── platforms/
|
||||
└── xiaohongshu/ # First platform plugin
|
||||
├── index.ts # PlatformPlugin registration
|
||||
├── actions.ts # Business logic (shared by MCP + REST)
|
||||
├── selectors.ts # CSS selectors
|
||||
├── schemas.ts # Zod schemas for MCP tools
|
||||
├── types.ts # Domain types
|
||||
└── *.ts # Feature modules (login, search, etc.)
|
||||
```
|
||||
|
||||
## Key Commands
|
||||
|
||||
```bash
|
||||
pnpm build # Build with tsup
|
||||
pnpm dev # Watch mode build
|
||||
pnpm start # Run built server
|
||||
pnpm test # Run vitest
|
||||
pnpm lint # TypeScript type check
|
||||
```
|
||||
|
||||
## Architecture Rules
|
||||
|
||||
- Each platform is a PlatformPlugin — shared infra, independent business logic
|
||||
- actions.ts is the single source of business logic (MCP + REST both call it)
|
||||
- CSS selectors go in selectors.ts — never hardcode in business logic
|
||||
- All MCP tools use withErrorHandling wrapper with error classification
|
||||
- BrowserManager serializes per-platform operations (withPage for normal ops, acquirePage for login)
|
||||
- zod ^3.25 only — v4 breaks MCP SDK compatibility
|
||||
- Cookie files use 0600 permissions with atomic write (tmp + rename)
|
||||
- Log redaction uses ** deep glob patterns
|
||||
- Default listen on 127.0.0.1 with Host header validation
|
||||
|
||||
## Testing
|
||||
|
||||
- vitest for unit tests (pure logic only)
|
||||
- Don't mock Playwright Page — test real browser interactions with MCP Inspector
|
||||
- Test: BrowserManager queue, CookieStore, error classification, zod schemas, data parsing
|
||||
+89
@@ -0,0 +1,89 @@
|
||||
# =============================================================================
|
||||
# Stage 1: Builder
|
||||
# =============================================================================
|
||||
|
||||
FROM node:22-slim AS builder
|
||||
|
||||
# Enable corepack for pnpm
|
||||
RUN corepack enable && corepack prepare pnpm@latest --activate
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package manifests first (layer caching for dependency install)
|
||||
COPY package.json pnpm-lock.yaml ./
|
||||
|
||||
# Install all dependencies (including devDependencies for building)
|
||||
RUN pnpm install --frozen-lockfile
|
||||
|
||||
# Copy source code
|
||||
COPY tsconfig.json tsup.config.ts ./
|
||||
COPY src/ src/
|
||||
|
||||
# Build the project
|
||||
RUN pnpm build
|
||||
|
||||
# Remove devDependencies to slim down node_modules for production
|
||||
RUN pnpm prune --prod
|
||||
|
||||
# =============================================================================
|
||||
# Stage 2: Production
|
||||
# =============================================================================
|
||||
|
||||
FROM node:22-slim
|
||||
|
||||
# Install Chromium dependencies required by Playwright/rebrowser-playwright
|
||||
# These are the shared libraries Chromium needs to run in headless mode.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libnss3 \
|
||||
libnspr4 \
|
||||
libatk1.0-0 \
|
||||
libatk-bridge2.0-0 \
|
||||
libcups2 \
|
||||
libdrm2 \
|
||||
libdbus-1-3 \
|
||||
libxkbcommon0 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxfixes3 \
|
||||
libxrandr2 \
|
||||
libgbm1 \
|
||||
libpango-1.0-0 \
|
||||
libcairo2 \
|
||||
libasound2 \
|
||||
libatspi2.0-0 \
|
||||
libwayland-client0 \
|
||||
fonts-noto-cjk \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd --gid 1001 appuser \
|
||||
&& useradd --uid 1001 --gid appuser --shell /bin/sh --create-home appuser
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy built artifacts and production dependencies from builder
|
||||
COPY --from=builder --chown=appuser:appuser /app/dist ./dist
|
||||
COPY --from=builder --chown=appuser:appuser /app/node_modules ./node_modules
|
||||
COPY --from=builder --chown=appuser:appuser /app/package.json ./package.json
|
||||
|
||||
# Create data directory for cookies and API token
|
||||
RUN mkdir -p /home/appuser/.social-mcp \
|
||||
&& chown -R appuser:appuser /home/appuser/.social-mcp
|
||||
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
|
||||
# Environment defaults
|
||||
ENV NODE_ENV=production \
|
||||
HOST=0.0.0.0 \
|
||||
PORT=3000 \
|
||||
HEADLESS=true \
|
||||
COOKIE_DIR=/home/appuser/.social-mcp \
|
||||
ALLOW_REMOTE=yes-i-understand-the-risk
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --retries=3 --start-period=10s \
|
||||
CMD node -e "fetch('http://localhost:3000/health').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))"
|
||||
|
||||
CMD ["node", "dist/index.js"]
|
||||
@@ -0,0 +1,911 @@
|
||||
# Social Auto Hub — 多平台社交自动化 MCP 服务
|
||||
|
||||
## 一、项目定位
|
||||
|
||||
通过浏览器自动化,让 AI 助手(Claude 等)能操控多个社交平台。
|
||||
首批支持:**小红书**。后续按需扩展小黑盒、B站、微博等。
|
||||
|
||||
---
|
||||
|
||||
## 二、技术栈选型
|
||||
|
||||
| 组件 | 选择 | 理由 |
|
||||
|------|------|------|
|
||||
| 语言 | **TypeScript 5.x** | 浏览器自动化生态最强,MCP SDK 是官方参考实现 |
|
||||
| 运行时 | **Node.js 22 LTS** | 当前 LTS,Node 20 即将进入 maintenance |
|
||||
| 浏览器自动化 | **rebrowser-playwright** | Playwright 的 drop-in 替代,内置反自动化检测补丁,活跃维护 |
|
||||
| MCP SDK | **@modelcontextprotocol/sdk ^1.27** | 官方 TypeScript 参考实现,锁定 v1.x 稳定版 |
|
||||
| HTTP 服务 | **Express ^4** | MCP SDK 原生支持 Express 集成 |
|
||||
| 包管理 | **pnpm** | 快,磁盘占用小 |
|
||||
| 构建 | **tsup** | 零配置 TS 打包,基于 esbuild |
|
||||
| 日志 | **pino ^9** | 高性能结构化日志,内置 redact 脱敏 |
|
||||
| 校验 | **zod ^3.25** | 运行时类型校验,MCP SDK 原生支持。**不用 v4**(与 SDK 有兼容问题) |
|
||||
| 容器化 | **Docker** | 含 Playwright 浏览器 |
|
||||
|
||||
### 为什么用 rebrowser-playwright 而不是 playwright-extra?
|
||||
|
||||
`playwright-extra` 最后发布 4.3.6 是 3 年前,已停止维护,随时会与新版 Playwright 不兼容。
|
||||
`rebrowser-playwright` 是 Playwright 的 patched fork(2026.2 仍在更新),直接 `npm install rebrowser-playwright` 即可,
|
||||
代码中 `import { chromium } from 'rebrowser-playwright'` 替代原 `playwright`,零改动。
|
||||
它在底层修复 CDP 泄漏等检测点,比 JS 层面的 stealth 脚本更彻底。
|
||||
|
||||
### 为什么锁定 zod ^3.25?
|
||||
|
||||
MCP SDK v1.x 的 `server.tool()` 内部依赖 zod v3 的 schema 结构。
|
||||
zod v4 有 breaking change(`.describe()` 丢失、`_parse` 不兼容),
|
||||
参见 [Issue #925](https://github.com/modelcontextprotocol/typescript-sdk/issues/925)。
|
||||
|
||||
---
|
||||
|
||||
## 三、架构设计:插件式多平台
|
||||
|
||||
### 3.1 设计原则
|
||||
|
||||
**不做统一 Platform 接口。** 各平台业务差异大(小红书的笔记 vs B站的弹幕 vs 微博的转发),强行抽象只会产生大量 `NotSupported`。
|
||||
|
||||
真正共享的是**基础设施**,业务逻辑各平台独立:
|
||||
|
||||
```
|
||||
共享基础设施 各平台独立
|
||||
────────── ──────────
|
||||
BrowserManager MCP 工具定义 + zod schema
|
||||
CookieStore actions(业务逻辑层)
|
||||
Config / Logger 页面操作逻辑
|
||||
MCP Server 框架 CSS 选择器
|
||||
Express + 中间件 业务类型定义
|
||||
错误处理包装 REST API handler
|
||||
```
|
||||
|
||||
### 3.2 平台插件契约
|
||||
|
||||
每个平台导出一个 `PlatformPlugin`,把自己的 MCP 工具注册进去:
|
||||
|
||||
```typescript
|
||||
export interface PlatformPlugin {
|
||||
name: string;
|
||||
registerTools(server: McpServer, browser: BrowserManager): void;
|
||||
registerRoutes?(router: express.Router, browser: BrowserManager): void;
|
||||
// 生命周期钩子(可选,第二个平台接入时再强制要求)
|
||||
init?(): Promise<void>;
|
||||
shutdown?(): Promise<void>;
|
||||
healthCheck?(): Promise<{ healthy: boolean; message?: string }>;
|
||||
}
|
||||
```
|
||||
|
||||
新增平台只需要:
|
||||
1. 在 `src/platforms/` 下新建目录
|
||||
2. 实现 `PlatformPlugin`
|
||||
3. 在 `src/index.ts` 中 import 并注册
|
||||
|
||||
> **Review 备注**:`init/shutdown/healthCheck` 当前为可选钩子。
|
||||
> Phase 1 只有小红书一个平台,暂不强制。等第二个平台接入时评估是否改为必选。
|
||||
|
||||
### 3.3 平台内部分层
|
||||
|
||||
每个平台内部统一采用 **actions 层** 分离业务逻辑和 handler:
|
||||
|
||||
```
|
||||
platforms/xiaohongshu/
|
||||
├── index.ts # PlatformPlugin: 注册 MCP 工具,调用 actions
|
||||
├── actions.ts # 业务逻辑层(纯函数,接收 Page 返回数据)
|
||||
├── selectors.ts # CSS 选择器常量
|
||||
├── schemas.ts # zod schema(MCP 工具参数)
|
||||
├── types.ts # 业务类型
|
||||
├── login.ts # 登录操作(页面交互)
|
||||
├── search.ts # 搜索操作
|
||||
├── ... # 其他操作
|
||||
```
|
||||
|
||||
**actions.ts** 是核心:MCP handler 和 REST handler 都调用 actions 中的函数,不重复业务逻辑。
|
||||
|
||||
```typescript
|
||||
// actions.ts — 纯业务逻辑,不关心 MCP/REST
|
||||
export async function searchFeeds(page: Page, keyword: string, filters?: FilterOption): Promise<Feed[]> { ... }
|
||||
export async function checkLoginStatus(page: Page): Promise<LoginStatus> { ... }
|
||||
|
||||
// index.ts — MCP handler 只做参数解析 + 调用 action + 格式化输出
|
||||
server.tool('xhs_search', '搜索小红书笔记', SearchSchema, async (args) => {
|
||||
return browser.withPage('xiaohongshu', async (page) => {
|
||||
const feeds = await searchFeeds(page, args.keyword, args.filters);
|
||||
return { content: [{ type: 'text', text: JSON.stringify(feeds) }] };
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、项目结构
|
||||
|
||||
```
|
||||
social-auto-hub/
|
||||
├── package.json
|
||||
├── tsconfig.json
|
||||
├── tsup.config.ts
|
||||
├── .env.example
|
||||
├── .gitignore
|
||||
├── CLAUDE.md
|
||||
├── Dockerfile
|
||||
│
|
||||
├── src/
|
||||
│ ├── index.ts # 入口:配置、注册插件、启动、优雅关闭(drain→browser→http→log)
|
||||
│ │
|
||||
│ ├── server/
|
||||
│ │ ├── app.ts # AppServer: Express + MCP 生命周期
|
||||
│ │ └── middleware.ts # DNS rebinding 防御 + 错误处理 + 优雅关闭 503(不加 CORS)
|
||||
│ │
|
||||
│ ├── browser/
|
||||
│ │ └── manager.ts # BrowserManager: 浏览器 + Context + 串行队列 + 背压 + 启动锁
|
||||
│ │
|
||||
│ ├── cookie/
|
||||
│ │ └── store.ts # CookieStore: 按平台隔离,文件权限 0600,原子写入
|
||||
│ │
|
||||
│ ├── config/
|
||||
│ │ └── index.ts # 全局配置(环境变量,默认 127.0.0.1)
|
||||
│ │
|
||||
│ ├── utils/
|
||||
│ │ ├── logger.ts # pino 日志(深层 redact 脱敏 + 自定义错误序列化)
|
||||
│ │ ├── errors.ts # 错误分类 + 消息脱敏 + withErrorHandling 包装
|
||||
│ │ └── downloader.ts # 图片下载 + 媒体路径校验
|
||||
│ │
|
||||
│ └── platforms/
|
||||
│ └── xiaohongshu/
|
||||
│ ├── index.ts # PlatformPlugin 注册
|
||||
│ ├── actions.ts # 业务逻辑层(MCP/REST 共享)
|
||||
│ ├── selectors.ts # CSS 选择器常量
|
||||
│ ├── types.ts # 小红书业务类型
|
||||
│ ├── schemas.ts # MCP 工具参数 zod schema
|
||||
│ ├── login.ts # 登录(特殊 Page 生命周期)
|
||||
│ ├── search.ts # 搜索
|
||||
│ ├── feeds.ts # Feed 列表
|
||||
│ ├── feed-detail.ts # 笔记详情 + 评论
|
||||
│ ├── publish.ts # 图文发布
|
||||
│ ├── publish-video.ts # 视频发布
|
||||
│ ├── comment.ts # 评论 + 回复
|
||||
│ ├── interaction.ts # 点赞 / 收藏
|
||||
│ └── user-profile.ts # 用户主页
|
||||
│
|
||||
└── deploy/
|
||||
└── docker-compose.yml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、小红书 MCP 工具清单(13 个)
|
||||
|
||||
### 5.1 登录管理(3 个)
|
||||
|
||||
| MCP 工具名 | 说明 | 读/写 | 参数 |
|
||||
|-----------|------|-------|------|
|
||||
| `xhs_check_login` | 检查登录状态 | 只读 | 无 |
|
||||
| `xhs_get_login_qrcode` | 获取登录二维码图片 | 只读 | 无 |
|
||||
| `xhs_delete_cookies` | 删除 Cookie,重置登录 | 写(破坏性) | 无 |
|
||||
|
||||
### 5.2 内容浏览(4 个)
|
||||
|
||||
| MCP 工具名 | 说明 | 读/写 | 参数 |
|
||||
|-----------|------|-------|------|
|
||||
| `xhs_list_feeds` | 获取首页推荐 Feed 列表 | 只读 | 无 |
|
||||
| `xhs_search` | 搜索笔记 | 只读 | `keyword`, `filters?`(排序/类型/时间/范围/位置) |
|
||||
| `xhs_get_feed_detail` | 获取笔记详情 + 评论 | 只读 | `feed_id`, `xsec_token`, `load_all_comments?` |
|
||||
| `xhs_get_user_profile` | 获取用户主页信息 | 只读 | `user_id`, `xsec_token` |
|
||||
|
||||
> **注意**:`xhs_get_feed_detail` 相比参考项目精简了参数。
|
||||
> `scroll_speed`、`click_more_replies`、`reply_limit` 是浏览器实现细节,
|
||||
> 不暴露给 AI,改为服务端内部配置(在 config 或 actions 层处理)。
|
||||
|
||||
### 5.3 内容发布(2 个)
|
||||
|
||||
| MCP 工具名 | 说明 | 读/写 | 参数 |
|
||||
|-----------|------|-------|------|
|
||||
| `xhs_publish_image` | 发布图文笔记 | 写 | `title`, `content`, `images[]`, `tags?[]`, `schedule_at?`, `is_original?`, `visibility?` |
|
||||
| `xhs_publish_video` | 发布视频笔记 | 写 | `title`, `content`, `video`, `tags?[]`, `schedule_at?`, `visibility?` |
|
||||
|
||||
### 5.4 互动操作(4 个)
|
||||
|
||||
| MCP 工具名 | 说明 | 读/写 | 参数 |
|
||||
|-----------|------|-------|------|
|
||||
| `xhs_post_comment` | 发表评论 | 写 | `feed_id`, `xsec_token`, `content` |
|
||||
| `xhs_reply_comment` | 回复评论 | 写 | `feed_id`, `xsec_token`, `comment_id?`, `user_id?`, `content` |
|
||||
| `xhs_like` | 点赞/取消点赞 | 写 | `feed_id`, `xsec_token`, `unlike?` |
|
||||
| `xhs_favorite` | 收藏/取消收藏 | 写 | `feed_id`, `xsec_token`, `unfavorite?` |
|
||||
|
||||
### 5.5 REST API(Phase 5,可选)
|
||||
|
||||
```
|
||||
GET /api/xhs/login/status POST /api/xhs/publish/image
|
||||
GET /api/xhs/login/qrcode POST /api/xhs/publish/video
|
||||
DELETE /api/xhs/login/cookies POST /api/xhs/comment
|
||||
GET /api/xhs/feeds POST /api/xhs/comment/reply
|
||||
POST /api/xhs/search POST /api/xhs/like
|
||||
POST /api/xhs/feeds/detail POST /api/xhs/favorite
|
||||
POST /api/xhs/user/profile
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 六、核心模块设计
|
||||
|
||||
### 6.1 BrowserManager
|
||||
|
||||
```typescript
|
||||
import { Browser, BrowserContext, Page } from 'rebrowser-playwright';
|
||||
|
||||
class BrowserManager {
|
||||
private browser: Browser | null = null;
|
||||
private contexts = new Map<string, BrowserContext>();
|
||||
private queues = new Map<string, Promise<void>>(); // per-platform 串行队列
|
||||
private queueDepths = new Map<string, number>(); // 队列深度计数
|
||||
private launchPromise: Promise<Browser> | null = null; // 启动锁
|
||||
|
||||
private readonly MAX_QUEUE_DEPTH = 10;
|
||||
|
||||
// 分级超时:不同操作类型使用不同超时时间
|
||||
static readonly OPERATION_TIMEOUTS: Record<string, number> = {
|
||||
like: 15_000, // 15s — 快速交互
|
||||
comment: 20_000, // 20s
|
||||
feed_list: 30_000, // 30s — 页面加载 + 提取
|
||||
search: 30_000, // 30s
|
||||
feed_detail: 60_000, // 60s — 含滚动加载
|
||||
publish: 300_000, // 5min — 上传可能较慢
|
||||
login: 300_000, // 5min — 用户交互
|
||||
default: 60_000, // 1min — 兜底
|
||||
};
|
||||
|
||||
// 核心方法:串行执行 + 超时控制 + 错误恢复 + 背压
|
||||
async withPage<T>(
|
||||
platform: string,
|
||||
fn: (page: Page) => Promise<T>,
|
||||
timeoutMs?: number,
|
||||
): Promise<T> {
|
||||
// 0. 背压:队列深度限制,防止无限排队
|
||||
const depth = this.queueDepths.get(platform) ?? 0;
|
||||
if (depth >= this.MAX_QUEUE_DEPTH) {
|
||||
throw new Error(`平台 ${platform} 队列已满 (${this.MAX_QUEUE_DEPTH}),请稍后重试`);
|
||||
}
|
||||
this.queueDepths.set(platform, depth + 1);
|
||||
|
||||
const effectiveTimeout = timeoutMs ?? BrowserManager.OPERATION_TIMEOUTS.default;
|
||||
|
||||
// 1. 串行化:同一平台的操作排队执行,避免多 Page 互相干扰
|
||||
const prev = this.queues.get(platform) ?? Promise.resolve();
|
||||
const task = prev.then(async () => {
|
||||
// 2. 检查浏览器存活
|
||||
await this.ensureBrowser();
|
||||
const ctx = await this.getContext(platform);
|
||||
const page = await ctx.newPage();
|
||||
// 设置 Playwright 级别超时,确保底层操作也受控
|
||||
page.setDefaultTimeout(effectiveTimeout);
|
||||
page.setDefaultNavigationTimeout(effectiveTimeout);
|
||||
try {
|
||||
// 3. 超时控制(带 timer 清理,避免泄漏)
|
||||
let timer: ReturnType<typeof setTimeout>;
|
||||
const result = await Promise.race([
|
||||
fn(page),
|
||||
new Promise<never>((_, reject) => {
|
||||
timer = setTimeout(
|
||||
() => reject(new Error(`操作超时: ${effectiveTimeout}ms`)),
|
||||
effectiveTimeout,
|
||||
);
|
||||
}),
|
||||
]);
|
||||
clearTimeout(timer!);
|
||||
return result;
|
||||
} catch (err) {
|
||||
// 超时时 fn 可能仍在运行,page.close() 会中断它
|
||||
throw err;
|
||||
} finally {
|
||||
await page.close().catch(() => {});
|
||||
}
|
||||
});
|
||||
this.queues.set(platform, task.then(() => {}, () => {}));
|
||||
|
||||
// 队列深度计数还原
|
||||
task.finally(() => {
|
||||
this.queueDepths.set(platform, (this.queueDepths.get(platform) ?? 1) - 1);
|
||||
});
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
// 登录专用:返回 Page 不自动关闭,调用方负责生命周期
|
||||
// release 必须是幂等的(内部用 released 标志位防止重复调用)
|
||||
async acquirePage(platform: string): Promise<{ page: Page; release: () => Promise<void> }> {
|
||||
await this.ensureBrowser();
|
||||
const ctx = await this.getContext(platform);
|
||||
const page = await ctx.newPage();
|
||||
|
||||
let released = false;
|
||||
const release = async () => {
|
||||
if (released) return; // 幂等
|
||||
released = true;
|
||||
await page.close().catch(() => {});
|
||||
};
|
||||
|
||||
// 安全网:最长 5 分钟自动释放,防止 release 忘记调用
|
||||
const maxLifetime = setTimeout(() => {
|
||||
logger.warn({ platform }, '页面超过最大生命周期,强制释放');
|
||||
release();
|
||||
}, 5 * 60 * 1000);
|
||||
|
||||
return {
|
||||
page,
|
||||
release: async () => {
|
||||
clearTimeout(maxLifetime);
|
||||
await release();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// 检查浏览器存活,崩溃则重建
|
||||
// 用 launchPromise 作为启动锁,防止并发调用时启动多个浏览器进程
|
||||
private async ensureBrowser(): Promise<Browser> {
|
||||
if (this.browser?.isConnected()) return this.browser;
|
||||
|
||||
if (!this.launchPromise) {
|
||||
this.launchPromise = (async () => {
|
||||
// 如果 browser 存在但已断开,先清理
|
||||
if (this.browser) {
|
||||
logger.warn('浏览器连接断开,正在重新启动');
|
||||
this.contexts.clear();
|
||||
this.browser = null;
|
||||
}
|
||||
const browser = await chromium.launch({ headless: config.headless });
|
||||
// 监听断开事件,主动清理状态
|
||||
browser.on('disconnected', () => {
|
||||
logger.error('浏览器进程意外断开');
|
||||
this.browser = null;
|
||||
this.contexts.clear();
|
||||
});
|
||||
this.browser = browser;
|
||||
return browser;
|
||||
})().finally(() => {
|
||||
this.launchPromise = null;
|
||||
});
|
||||
}
|
||||
|
||||
return this.launchPromise;
|
||||
}
|
||||
|
||||
private async getContext(platform: string): Promise<BrowserContext>;
|
||||
async saveCookies(platform: string): Promise<void>;
|
||||
|
||||
// 等待所有队列排空(用于优雅关闭)
|
||||
async drain(): Promise<void> {
|
||||
await Promise.allSettled(Array.from(this.queues.values()));
|
||||
}
|
||||
|
||||
async close(): Promise<void>;
|
||||
}
|
||||
```
|
||||
|
||||
关键设计点:
|
||||
- **per-platform 串行队列**:同一平台操作排队,不同平台可并行
|
||||
- **队列背压**:`MAX_QUEUE_DEPTH = 10`,超出直接拒绝,防止无限排队
|
||||
- **分级超时**:按操作类型设置不同超时(like 15s vs publish 5min),避免快操作被慢操作的默认超时卡住
|
||||
- **超时 timer 清理**:`Promise.race` 成功后 `clearTimeout`,避免 timer 泄漏和 `unhandledRejection`
|
||||
- **Playwright 级别超时**:`page.setDefaultTimeout()` 确保底层操作也受控,超时时 `page.close()` 会中断正在运行的 fn
|
||||
- **启动锁**:`launchPromise` 防止并发调用 `ensureBrowser()` 时启动多个浏览器进程
|
||||
- **崩溃恢复**:`browser.on('disconnected')` 主动清理状态,下次调用自动重建
|
||||
- **登录特殊处理**:`acquirePage()` 返回幂等的 `release` 函数 + 5 分钟安全网超时
|
||||
- **优雅关闭**:`drain()` 等待所有队列排空后再关闭浏览器
|
||||
|
||||
### 6.2 登录流程(特殊 Page 生命周期)
|
||||
|
||||
```typescript
|
||||
// login.ts — 扫码登录不能用 withPage(需要页面保持打开等用户扫码)
|
||||
export async function getLoginQRCode(browser: BrowserManager): Promise<QRCodeResult> {
|
||||
const { page, release } = await browser.acquirePage('xiaohongshu');
|
||||
|
||||
try {
|
||||
await page.goto('https://www.xiaohongshu.com/explore');
|
||||
// ... 检查是否已登录 ...
|
||||
const qrcodeData = await page.getAttribute('.login-container .qrcode-img', 'src');
|
||||
|
||||
// 后台等待扫码完成,然后保存 Cookie 并释放 Page
|
||||
// 必须 catch 错误,fire-and-forget 不能让异常逃逸
|
||||
waitForLoginAndRelease(page, browser, release).catch(err => {
|
||||
logger.error({ err }, '登录等待流程异常');
|
||||
});
|
||||
|
||||
return { qrcodeData, alreadyLoggedIn: false, timeout: '4m' };
|
||||
} catch (err) {
|
||||
await release();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async function waitForLoginAndRelease(page: Page, browser: BrowserManager, release: () => Promise<void>) {
|
||||
// release 已经是幂等的(BrowserManager.acquirePage 保证),
|
||||
// 所以即使 setTimeout 和 finally 都触发,也不会重复释放。
|
||||
const timeout = setTimeout(() => release(), 4 * 60 * 1000);
|
||||
try {
|
||||
await page.waitForSelector('.user .link-wrapper .channel', { timeout: 4 * 60 * 1000 });
|
||||
await browser.saveCookies('xiaohongshu');
|
||||
} catch {} finally {
|
||||
clearTimeout(timeout);
|
||||
await release();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> **Review 修正**:原设计中 `release()` 没有幂等保护,`setTimeout` 和 `finally` 可能双重触发。
|
||||
> 现在 `acquirePage` 内部保证 `release` 幂等(`released` 标志位),彻底消除该竞态。
|
||||
> 同时 `waitForLoginAndRelease` 的 fire-and-forget 调用必须 `.catch()` 防止异常逃逸。
|
||||
|
||||
### 6.3 CookieStore
|
||||
|
||||
```typescript
|
||||
class CookieStore {
|
||||
// 目录: ~/.social-auto-hub/<platform>/
|
||||
// 文件权限: 0o600(仅 owner 可读写)
|
||||
// 目录权限: 0o700
|
||||
|
||||
getPath(platform: string): string;
|
||||
async load(platform: string): Promise<StorageState | null>;
|
||||
|
||||
// 原子写入:先写临时文件再 rename,防止崩溃导致文件损坏
|
||||
async save(platform: string, state: StorageState): Promise<void> {
|
||||
const filePath = this.getPath(platform);
|
||||
const tmpPath = `${filePath}.tmp.${process.pid}`;
|
||||
await fs.writeFile(tmpPath, JSON.stringify(state), { mode: 0o600 });
|
||||
await fs.rename(tmpPath, filePath);
|
||||
}
|
||||
|
||||
async delete(platform: string): Promise<void>;
|
||||
}
|
||||
```
|
||||
|
||||
> **Review 补充**:Cookie 加密(AES-256-GCM)暂不实现。
|
||||
> 当前安全模型:文件权限 0600 + 监听 127.0.0.1,对本地自动化工具足够。
|
||||
> 如果未来需要更高安全级别(多用户共享机器),再引入加密 + OS keychain 集成。
|
||||
|
||||
### 6.4 统一错误处理 + 错误分类
|
||||
|
||||
浏览器自动化的错误类型多样,AI 助手需要根据错误类型决定下一步操作(重试?重新登录?报告失败?)。
|
||||
因此引入**错误分类体系**,让 MCP 响应携带结构化的错误信息。
|
||||
|
||||
```typescript
|
||||
// utils/errors.ts
|
||||
|
||||
// 错误分类枚举
|
||||
enum ErrorCategory {
|
||||
TIMEOUT = 'TIMEOUT', // 操作超时
|
||||
AUTH_REQUIRED = 'AUTH_REQUIRED', // 需要登录
|
||||
SELECTOR_NOT_FOUND = 'SELECTOR_NOT_FOUND', // 选择器未找到(平台 UI 可能已变)
|
||||
NETWORK = 'NETWORK', // 网络错误
|
||||
PLATFORM_ERROR = 'PLATFORM_ERROR', // 平台返回错误
|
||||
INTERNAL = 'INTERNAL', // 内部错误
|
||||
}
|
||||
|
||||
// 错误分类函数
|
||||
function classifyError(err: Error): ErrorCategory {
|
||||
const msg = err.message.toLowerCase();
|
||||
if (msg.includes('timeout') || err.name === 'TimeoutError') return ErrorCategory.TIMEOUT;
|
||||
if (msg.includes('net::err_')) return ErrorCategory.NETWORK;
|
||||
if (msg.includes('login') || msg.includes('登录')) return ErrorCategory.AUTH_REQUIRED;
|
||||
if (msg.includes('waiting for selector') || msg.includes('找不到元素')) return ErrorCategory.SELECTOR_NOT_FOUND;
|
||||
return ErrorCategory.INTERNAL;
|
||||
}
|
||||
|
||||
// 错误消息脱敏:去掉文件路径、长 token 等内部信息
|
||||
function sanitizeErrorMessage(message: string): string {
|
||||
return message
|
||||
.replace(/\/[^\s:]+/g, '[path]') // 文件系统路径
|
||||
.replace(/https?:\/\/[^\s]+/g, '[url]') // URL(可能含 token)
|
||||
.replace(/[a-f0-9]{32,}/gi, '[hash]') // 长 hex 串
|
||||
.substring(0, 200);
|
||||
}
|
||||
|
||||
// 统一错误包装(注意:必须是 async 函数)
|
||||
export async function withErrorHandling(
|
||||
toolName: string,
|
||||
fn: () => Promise<McpToolResult>,
|
||||
): Promise<McpToolResult> {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (err) {
|
||||
const error = err instanceof Error ? err : new Error(String(err));
|
||||
const category = classifyError(error);
|
||||
const safeMessage = sanitizeErrorMessage(error.message);
|
||||
|
||||
logger.error({ tool: toolName, category, err: error }, '工具执行失败');
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: JSON.stringify({
|
||||
tool: toolName,
|
||||
error: category,
|
||||
message: safeMessage,
|
||||
}),
|
||||
}],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
每个 MCP 工具 handler 用 `withErrorHandling` 包裹,Playwright 的 `TimeoutError`、
|
||||
`TargetClosedError` 等异常统一转为 MCP 的 `isError: true` 响应,不会导致连接断开。
|
||||
AI 助手可以根据 `error` 字段判断是否需要重试、重新登录、或报告错误。
|
||||
|
||||
> **Review 修正**:原设计中 `withErrorHandling` 不是 `async` 却用了 `await`(编译不通过),
|
||||
> 且返回类型 `T` 和 `McpToolResult` 不匹配。已修正。
|
||||
> 新增错误分类 + 消息脱敏,防止泄露文件路径等内部信息。
|
||||
|
||||
### 6.5 日志脱敏
|
||||
|
||||
```typescript
|
||||
// utils/logger.ts
|
||||
import pino from 'pino';
|
||||
|
||||
export const logger = pino({
|
||||
// 使用深层通配 ** 匹配任意嵌套层级,避免漏掉嵌套对象中的敏感字段
|
||||
redact: {
|
||||
paths: [
|
||||
'**.cookie', '**.cookies', '**.set-cookie',
|
||||
'**.authorization', '**.password', '**.secret',
|
||||
'**.token', '**.xsec_token', '**.access_token', '**.refresh_token',
|
||||
'**.api_key', '**.apikey',
|
||||
'**.sessionid', '**.session_id',
|
||||
'**.cookies[*].value', // Playwright StorageState 中的 cookie 值
|
||||
'**.origins[*].localStorage[*].value', // Playwright StorageState 中的 localStorage 值
|
||||
],
|
||||
censor: '[REDACTED]',
|
||||
},
|
||||
// 自定义错误序列化:生产环境不输出 stack
|
||||
serializers: {
|
||||
err: (err: Error) => ({
|
||||
type: err.constructor.name,
|
||||
message: err.message,
|
||||
...(process.env.NODE_ENV === 'development' ? { stack: err.stack } : {}),
|
||||
}),
|
||||
},
|
||||
transport: process.env.NODE_ENV !== 'production'
|
||||
? { target: 'pino-pretty' }
|
||||
: undefined,
|
||||
});
|
||||
|
||||
// 生产环境禁用 Playwright 调试日志(会绕过 pino 直接输出到 stdout)
|
||||
if (process.env.NODE_ENV === 'production') {
|
||||
delete process.env.DEBUG;
|
||||
}
|
||||
```
|
||||
|
||||
> **Review 修正**:原 `*.field` 只匹配一层嵌套,改为 `**.field` 深层匹配。
|
||||
> 补充了 Playwright StorageState 中 cookie value 的脱敏规则。
|
||||
> 生产环境禁用 `DEBUG=pw:*` 防止 Playwright 调试日志泄露敏感信息。
|
||||
|
||||
---
|
||||
|
||||
## 七、安全基线(Phase 1 必须落地)
|
||||
|
||||
| 措施 | 说明 |
|
||||
|------|------|
|
||||
| **默认监听 127.0.0.1** | 配置项 `HOST=127.0.0.1`,不暴露到局域网/公网 |
|
||||
| **Host header 校验** | 中间件校验 Host 头只允许 `127.0.0.1` / `localhost`,防御 DNS rebinding 攻击 |
|
||||
| **拒绝监听 0.0.0.0** | 如果用户配置 `HOST=0.0.0.0`,启动时警告并要求设置 `ALLOW_REMOTE=yes-i-understand-the-risk` |
|
||||
| **Cookie 文件权限 0600** | `fs.writeFile` 时指定 `mode: 0o600`,目录 0o700 |
|
||||
| **Cookie 原子写入** | 先写 `.tmp` 文件再 `rename`,防止崩溃导致文件损坏 |
|
||||
| **不加 CORS** | MCP 客户端走 HTTP 不走浏览器,不需要 CORS header |
|
||||
| **日志深层脱敏** | pino `**` 深层通配,token/cookie/password 永远不入日志 |
|
||||
| **错误消息脱敏** | `sanitizeErrorMessage()` 去掉文件路径、长 token,不返回堆栈 |
|
||||
| **媒体文件路径校验** | 发布图片/视频时校验路径无 `..` 穿越,限制文件大小和 MIME 类型 |
|
||||
|
||||
### 7.1 DNS Rebinding 防御
|
||||
|
||||
即使监听 `127.0.0.1`,攻击者网页仍可通过 DNS rebinding 向本地服务发送请求。
|
||||
由于浏览器对 simple POST(`Content-Type: text/plain` 等)不做 preflight,写操作会被执行。
|
||||
|
||||
```typescript
|
||||
// server/middleware.ts
|
||||
function dnsRebindingGuard(req: Request, res: Response, next: NextFunction) {
|
||||
const host = req.headers.host?.toLowerCase();
|
||||
const allowed = ['127.0.0.1', 'localhost', `127.0.0.1:${PORT}`, `localhost:${PORT}`];
|
||||
if (!host || !allowed.includes(host)) {
|
||||
logger.warn({ host }, 'DNS rebinding 请求被拦截');
|
||||
return res.status(403).json({ error: 'Forbidden' });
|
||||
}
|
||||
next();
|
||||
}
|
||||
```
|
||||
|
||||
### 7.2 REST API 认证(Phase 5 实现)
|
||||
|
||||
REST API 启用时必须加 Bearer token 认证。首次启动生成随机 token 并显示给用户。
|
||||
使用 `crypto.timingSafeEqual` 做时序安全比较。
|
||||
|
||||
### 7.3 媒体文件安全
|
||||
|
||||
```typescript
|
||||
// 发布图片/视频时的路径校验
|
||||
function validateMediaPath(filePath: string): string {
|
||||
const resolved = path.resolve(filePath);
|
||||
if (filePath.includes('..')) throw new Error('路径穿越不允许');
|
||||
// 校验文件大小(图片 20MB,视频 500MB)
|
||||
// 通过 magic bytes 校验 MIME 类型,不信任扩展名
|
||||
return resolved;
|
||||
}
|
||||
```
|
||||
|
||||
> **Review 说明**:DNS rebinding 是安全审计提出的高优先级风险,实现成本极低(一个中间件),Phase 1 必须加。
|
||||
> Bearer token 认证延迟到 Phase 5(REST API 实现时)。MCP 走 stdio transport,不经过 HTTP,不受此影响。
|
||||
|
||||
---
|
||||
|
||||
## 八、实现计划
|
||||
|
||||
### Phase 1: 骨架 + 基础设施
|
||||
|
||||
1. 初始化项目(pnpm, tsconfig, tsup, .gitignore, CLAUDE.md, .env.example)
|
||||
2. `src/config/index.ts` — 环境变量(PORT, HOST=127.0.0.1, HEADLESS, BROWSER_BIN)+ 启动时校验 HOST(拒绝 0.0.0.0 除非显式确认)
|
||||
3. `src/utils/logger.ts` — pino + `**` 深层 redact 脱敏 + 自定义错误序列化
|
||||
4. `src/utils/errors.ts` — withErrorHandling 错误包装 + 错误分类(ErrorCategory)+ 消息脱敏
|
||||
5. `src/browser/manager.ts` — BrowserManager(串行队列 + 分级超时 + timer 清理 + 启动锁 + 背压 + acquirePage 幂等释放 + 崩溃恢复 + drain)
|
||||
6. `src/cookie/store.ts` — CookieStore(权限 0600 + 原子写入)
|
||||
7. `src/server/app.ts` — AppServer(Express + MCP,不加 CORS)
|
||||
8. `src/server/middleware.ts` — DNS rebinding 防御中间件(Host header 校验)+ 优雅关闭 503 中间件
|
||||
9. `src/index.ts` — 入口 + SIGINT/SIGTERM 优雅关闭(先 drain 队列 → 关浏览器 → 关 HTTP → flush 日志)+ 全局 `unhandledRejection` / `uncaughtException` 处理
|
||||
10. `src/utils/downloader.ts` — 图片下载(发布功能的前置依赖)+ 媒体路径校验
|
||||
11. `/health` 端点 — 检查 Browser 存活、队列深度、内存使用(从 Phase 5 提前到 Phase 1)
|
||||
12. **测试**:BrowserManager 单元测试(队列串行、背压拒绝、超时、启动锁)+ CookieStore 单元测试(读/写/删/原子写入)+ 错误分类单元测试
|
||||
|
||||
### Phase 2: 小红书 — 登录
|
||||
|
||||
13. `selectors.ts` + `types.ts` + `schemas.ts`
|
||||
14. `login.ts` — 扫码登录、状态检查(使用 acquirePage,release 幂等)
|
||||
15. `actions.ts` 初始化 + `index.ts` 注册 3 个登录工具
|
||||
16. **测试**:zod schema 校验测试 + MCP Inspector 端到端验证登录流程
|
||||
|
||||
### Phase 3: 小红书 — 内容浏览
|
||||
|
||||
17. `feeds.ts` — 首页 Feed 列表(`__INITIAL_STATE__` 提取)
|
||||
18. `search.ts` — 搜索 + 筛选
|
||||
19. `feed-detail.ts` — 笔记详情 + 评论加载(scroll_speed 等内部默认配置)
|
||||
20. `user-profile.ts` — 用户主页
|
||||
21. **测试**:`__INITIAL_STATE__` JSON 解析单元测试 + MCP Inspector 验证 4 个浏览工具
|
||||
|
||||
### Phase 4: 小红书 — 发布 + 互动
|
||||
|
||||
22. `publish.ts` — 图文发布(依赖 downloader + 媒体路径校验)
|
||||
23. `publish-video.ts` — 视频发布
|
||||
24. `comment.ts` — 评论 + 回复
|
||||
25. `interaction.ts` — 点赞 / 收藏
|
||||
26. **测试**:downloader 单元测试 + 写操作手动验证(使用测试账号)
|
||||
|
||||
### Phase 5: 工程化
|
||||
|
||||
27. REST API 路由(调用 actions 层,复用 MCP 的业务逻辑)+ Bearer token 认证 + 限流
|
||||
28. Docker(见下方 Docker 配置要求)
|
||||
29. README
|
||||
|
||||
---
|
||||
|
||||
## 九、测试策略
|
||||
|
||||
### 9.1 原则
|
||||
|
||||
这个项目的核心是浏览器自动化,大部分逻辑依赖真实页面交互。
|
||||
**不 mock Playwright Page 来测试页面操作**——成本高、收益低、选择器一变全白写。
|
||||
只对**可独立测试的纯逻辑**写单元测试,浏览器交互靠端到端手动验证。
|
||||
|
||||
### 9.2 测试框架
|
||||
|
||||
| 工具 | 用途 |
|
||||
|------|------|
|
||||
| **vitest** | 单元测试(快,原生 TS 支持,兼容 Jest API) |
|
||||
| **MCP Inspector** | MCP 工具端到端验证(官方调试工具) |
|
||||
|
||||
devDependencies 补充:`"vitest": "^3.0.0"`
|
||||
|
||||
### 9.3 单元测试范围
|
||||
|
||||
| 模块 | 测什么 | 怎么测 |
|
||||
|------|--------|--------|
|
||||
| **BrowserManager** | 串行队列(并发调用按序执行)、超时(超时后正确抛错 + timer 被清理)、启动锁(并发 ensureBrowser 只启动一次)、背压(超出 MAX_QUEUE_DEPTH 拒绝)、崩溃恢复(isConnected=false 时重建) | mock `chromium.launch` 和 `Browser` 接口 |
|
||||
| **CookieStore** | 读/写/删、文件权限 0600、目录自动创建、原子写入(先 tmp 再 rename) | 真实文件系统(临时目录) |
|
||||
| **withErrorHandling** | 正常返回透传、异常转 `isError: true`、错误分类正确性、消息脱敏(路径/token 被替换) | 纯函数测试 |
|
||||
| **classifyError** | TIMEOUT / AUTH_REQUIRED / SELECTOR_NOT_FOUND / NETWORK / INTERNAL 各类型识别 | 构造不同错误消息测试 |
|
||||
| **sanitizeErrorMessage** | 文件路径替换、URL 替换、长 hex 串替换、长度截断 | 纯函数测试 |
|
||||
| **downloader** | URL 下载、本地路径校验(无 `..` 穿越)、非图片类型拒绝、文件大小限制 | mock HTTP 请求 |
|
||||
| **数据解析** | `__INITIAL_STATE__` JSON → Feed/Comment 类型的转换 | 固定 JSON fixture 输入,断言输出结构 |
|
||||
| **zod schema** | 必填字段缺失报错、可选字段默认值、枚举值校验 | 直接调用 `.parse()` / `.safeParse()` |
|
||||
| **dnsRebindingGuard** | 合法 Host 放行、非法 Host 返回 403 | 构造 mock request 测试 |
|
||||
|
||||
### 9.4 端到端验证(手动)
|
||||
|
||||
每个 Phase 结束后,用 MCP Inspector 连接服务,逐个调用该阶段的 MCP 工具:
|
||||
|
||||
| Phase | 验证内容 |
|
||||
|-------|---------|
|
||||
| Phase 2 | `xhs_check_login` → `xhs_get_login_qrcode`(扫码)→ `xhs_check_login`(确认已登录)→ `xhs_delete_cookies` → `xhs_check_login`(确认已登出) |
|
||||
| Phase 3 | `xhs_list_feeds`(有数据返回)→ `xhs_search`(关键词+筛选)→ `xhs_get_feed_detail`(取第一条的详情)→ `xhs_get_user_profile` |
|
||||
| Phase 4 | `xhs_publish_image`(测试账号发一条)→ `xhs_post_comment` → `xhs_like` → `xhs_favorite`。**写操作用测试账号,不用主力账号** |
|
||||
|
||||
### 9.5 不做的
|
||||
|
||||
- **不 mock Page 测页面操作**:选择器绑定真实 DOM,mock 出来的测试没有意义
|
||||
- **不做 CI 自动化端到端测试**:依赖真实浏览器 + 真实平台登录态,不适合 CI
|
||||
- **不追求覆盖率指标**:只测有价值的纯逻辑,不为凑数写无意义的测试
|
||||
|
||||
---
|
||||
|
||||
## 十、关键设计决策
|
||||
|
||||
### 10.1 为什么不做统一 Platform 接口?
|
||||
|
||||
各平台业务模型差异大,强行统一 = 大量 `NotSupported` + 失去类型安全。
|
||||
**插件模式**:共享基础设施,业务逻辑完全独立。
|
||||
|
||||
### 10.2 BrowserContext 隔离 + 串行队列
|
||||
|
||||
一个 Browser 进程 + 每个平台一个 BrowserContext + per-platform 操作串行化。
|
||||
- 共享进程,节省资源
|
||||
- Cookie/Storage 天然隔离
|
||||
- 串行避免多 Page 互相干扰(同一平台同时只有一个操作在执行)
|
||||
|
||||
> **读写分离队列(暂不实现)**:性能工程师建议读操作(搜索、浏览)可以并发执行,只有写操作需要串行。
|
||||
> 理论上正确,但当前 MCP 工具由 AI 顺序调用,极少出现并发场景。保持简单串行队列,
|
||||
> 如果未来确实遇到队列积压问题,再升级为读写锁。不过早优化。
|
||||
|
||||
> **进程级隔离(暂不实现)**:性能工程师建议每个平台一个浏览器进程,隔离崩溃影响。
|
||||
> 当前只有一个平台,一个进程足够。每多一个进程 +300MB 内存。等 3+ 平台时再考虑。
|
||||
|
||||
### 10.3 登录流程为什么不用 withPage?
|
||||
|
||||
`withPage` 是"用完即关",但扫码登录需要:
|
||||
1. 打开页面获取二维码 → 返回给 AI
|
||||
2. 用户扫码(页面保持打开)
|
||||
3. 扫码成功 → 保存 Cookie → 关闭页面
|
||||
|
||||
因此登录使用 `acquirePage()` + 手动 release,其他操作用 `withPage()`。
|
||||
`release` 函数是幂等的(内部 `released` 标志位),防止 setTimeout 和 finally 双重触发。
|
||||
|
||||
### 10.4 zod 一鱼两吃
|
||||
|
||||
- MCP 工具参数 schema(运行时校验)
|
||||
- TypeScript 类型推导(`z.infer`)
|
||||
- MCP SDK v1.x `server.tool()` 原生支持 zod shape
|
||||
|
||||
### 10.5 MCP 工具前缀 `xhs_`
|
||||
|
||||
简短、AI 易识别。后续平台:`xhh_`(小黑盒)、`bili_`(B站)、`wb_`(微博)。
|
||||
|
||||
### 10.6 为什么不在 Phase 1 加 Cookie 加密?
|
||||
|
||||
安全审计建议用 AES-256-GCM 加密 cookie 文件并集成 OS keychain。
|
||||
当前安全模型(文件权限 0600 + 监听 127.0.0.1 + Host 校验)对本地自动化工具足够。
|
||||
加密引入密钥管理复杂度(密钥存哪里?),收益与成本不匹配。
|
||||
如果未来需要多用户共享机器或远程部署,再引入加密。
|
||||
|
||||
### 10.7 为什么不做选择器 fallback?
|
||||
|
||||
架构师建议为每个 CSS 选择器准备 fallback 备选。
|
||||
但选择器失效时通常整个页面结构都变了,fallback 大概率也失效。
|
||||
保持简单的 `selectors.ts` 集中管理,发现失效时快速修复即可。
|
||||
|
||||
---
|
||||
|
||||
## 十一、风险和注意事项
|
||||
|
||||
1. **CSS 选择器失效** — 集中到 `selectors.ts` 便于快速修复
|
||||
2. **反爬升级** — rebrowser-playwright 修复底层检测点,比 JS 层 stealth 更彻底,但仍需持续跟进
|
||||
3. **浏览器资源** — Playwright 进程 ~300MB,Docker 镜像较大
|
||||
4. **Node.js 部署** — Docker 是推荐方式
|
||||
5. **浏览器崩溃影响** — 所有平台共享一个 Browser 进程,崩溃时全部 context 丢失。`browser.on('disconnected')` 主动清理状态,下次操作自动重建
|
||||
6. **Cookie 过期** — 平台可能随时吊销 session。当前无主动检测机制,依赖操作失败时的错误分类(`AUTH_REQUIRED`)提示 AI 重新登录
|
||||
7. **DNS rebinding 攻击** — 即使监听 localhost,攻击者网页可通过 DNS rebinding 发送 POST 请求触发写操作。Phase 1 用 Host header 校验中间件防御
|
||||
8. **Docker /dev/shm** — 必须配置 `shm_size: 1gb`,否则 Chromium 会因共享内存不足 SIGBUS 崩溃
|
||||
9. **长操作队列阻塞** — 视频发布可能耗时 1-5 分钟,期间同平台其他操作全部排队等待。已用分级超时缓解(快操作 15s 超时,不会被慢操作的默认超时卡住)
|
||||
|
||||
---
|
||||
|
||||
## 十二、Docker 配置要求
|
||||
|
||||
```yaml
|
||||
# deploy/docker-compose.yml
|
||||
services:
|
||||
social-auto-hub:
|
||||
build: .
|
||||
ports:
|
||||
- "127.0.0.1:3000:3000" # 必须带 127.0.0.1 前缀,不要省略
|
||||
shm_size: '1gb' # 必须:Chromium 需要足够的共享内存,默认 64MB 会导致 SIGBUS 崩溃
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 2g # 至少 2x 预期峰值
|
||||
cpus: '2.0' # Chromium 多进程架构需要至少 2 核
|
||||
reservations:
|
||||
memory: 1g
|
||||
cpus: '1.0'
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp:size=512m
|
||||
volumes:
|
||||
- cookie-data:/home/appuser/.social-auto-hub
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
|
||||
volumes:
|
||||
cookie-data:
|
||||
```
|
||||
|
||||
```dockerfile
|
||||
# Dockerfile 要点
|
||||
# - 非 root 用户运行(UID 1001)
|
||||
# - 多阶段构建,不把 .env 或密钥文件复制进镜像层
|
||||
# - HEALTHCHECK 检查 /health 端点
|
||||
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
|
||||
CMD node -e "fetch('http://localhost:3000/health').then(r => process.exit(r.ok ? 0 : 1))"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 十三、依赖清单
|
||||
|
||||
```json
|
||||
{
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.27.0",
|
||||
"rebrowser-playwright": "^1.52.0",
|
||||
"express": "^4.21.0",
|
||||
"pino": "^9.0.0",
|
||||
"zod": "^3.25.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.7.0",
|
||||
"tsup": "^8.0.0",
|
||||
"vitest": "^3.0.0",
|
||||
"pino-pretty": "^13.0.0",
|
||||
"@types/express": "^5.0.0",
|
||||
"@types/node": "^22.0.0"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 附录 A:Review 修正清单
|
||||
|
||||
本 Plan 经过四方专业评审(架构师、安全审计、后端架构师、性能工程师),以下是已采纳的修正:
|
||||
|
||||
### 已整合进 Plan 的修正(P0/P1)
|
||||
|
||||
| 优先级 | 问题 | 来源 | 对应章节 |
|
||||
|--------|------|------|---------|
|
||||
| **P0** | 超时 timer 泄漏 + fn 在 page 关闭后继续运行 | 全员 | 6.1 BrowserManager |
|
||||
| **P0** | `ensureBrowser()` 并发竞争可能启动多个浏览器 | 后端+性能 | 6.1 launchPromise |
|
||||
| **P0** | `release()` 双重调用竞态 | 后端+架构 | 6.1 acquirePage + 6.2 登录流程 |
|
||||
| **P0** | `withErrorHandling` 缺少 async 关键字 | 后端 | 6.4 错误处理 |
|
||||
| **P1** | 队列无深度限制(无背压) | 全员 | 6.1 MAX_QUEUE_DEPTH |
|
||||
| **P1** | 缺少错误分类体系 | 架构+后端 | 6.4 ErrorCategory |
|
||||
| **P1** | 错误消息泄露文件路径 | 安全 | 6.4 sanitizeErrorMessage |
|
||||
| **P1** | DNS rebinding 攻击 | 安全 | 7.1 dnsRebindingGuard |
|
||||
| **P1** | 日志 redact 只匹配一层 | 安全 | 6.5 `**` 深层通配 |
|
||||
| **P1** | Health check 在 Phase 5 太晚 | 架构+性能 | 8 Phase 1 第 11 项 |
|
||||
| **P1** | Docker `shm_size` 缺失 | 性能 | 十二 Docker 配置 |
|
||||
| **P1** | 浏览器崩溃后 contexts Map 残留 | 后端+性能 | 6.1 disconnected 事件 |
|
||||
|
||||
### 已整合但标记为延迟实现的
|
||||
|
||||
| 问题 | 来源 | 决定 | 对应章节 |
|
||||
|------|------|------|---------|
|
||||
| PlatformPlugin 生命周期钩子 | 架构 | 可选钩子,第二个平台时评估 | 3.2 |
|
||||
| REST API Bearer token | 安全 | Phase 5 实现 | 7.2 |
|
||||
| 媒体文件路径穿越 | 安全 | Phase 4 publish 时实现 | 7.3 |
|
||||
| Cookie 原子写入 | 后端 | Phase 1 CookieStore 实现 | 6.3 |
|
||||
| 分级超时 | 性能 | Phase 1 BrowserManager 实现 | 6.1 |
|
||||
|
||||
### 评审后明确不采纳的
|
||||
|
||||
| 建议 | 来源 | 不采纳原因 | 对应章节 |
|
||||
|------|------|-----------|---------|
|
||||
| Cookie 加密 + OS keychain | 安全 | 本地工具,文件权限已足够,密钥管理增加复杂度 | 10.6 |
|
||||
| 选择器 fallback 机制 | 架构 | 选择器失效时通常整页变化,fallback 无意义 | 10.7 |
|
||||
| 读写分离队列 | 性能 | AI 顺序调用极少并发,过早优化 | 10.2 |
|
||||
| 进程级平台隔离 | 性能 | 只有一个平台,+300MB/进程 成本过高 | 10.2 |
|
||||
| 内存安全(Buffer 清零) | 安全 | JS 字符串不可变,Node.js 中不实用 | — |
|
||||
@@ -0,0 +1,247 @@
|
||||
# Social MCP
|
||||
|
||||
Multi-platform social media automation service that exposes browser-based actions as both MCP (Model Context Protocol) tools and a REST API. Currently supports **Xiaohongshu** (Little Red Book).
|
||||
|
||||
## Features
|
||||
|
||||
- **13 MCP tools** for Xiaohongshu: login management, content browsing, publishing, and interactions
|
||||
- **REST API** with Bearer token authentication and rate limiting
|
||||
- **Browser automation** via rebrowser-playwright with anti-detection patches
|
||||
- **Cookie persistence** with file-based storage (0600 permissions, atomic writes)
|
||||
- **Security**: DNS rebinding protection, Host header validation, error message sanitization, log redaction
|
||||
- **Docker support** with hardened configuration (non-root user, read-only filesystem, resource limits)
|
||||
- **Plugin architecture** for adding new platforms
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Node.js >= 22.0.0
|
||||
- pnpm
|
||||
|
||||
### Install and Run
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pnpm install
|
||||
|
||||
# Install Playwright browsers (first time only)
|
||||
npx playwright install chromium
|
||||
|
||||
# Build
|
||||
pnpm build
|
||||
|
||||
# Start the server
|
||||
pnpm start
|
||||
```
|
||||
|
||||
The server starts on `http://127.0.0.1:3000` by default. A REST API Bearer token is printed to the console on first startup and saved to `~/.social-mcp/.api-token`.
|
||||
|
||||
### Development
|
||||
|
||||
```bash
|
||||
# Watch mode (rebuilds on file changes)
|
||||
pnpm dev
|
||||
|
||||
# Type check without emitting
|
||||
pnpm lint
|
||||
|
||||
# Run tests
|
||||
pnpm test
|
||||
```
|
||||
|
||||
## MCP Integration
|
||||
|
||||
### Claude Desktop
|
||||
|
||||
Add the following to your Claude Desktop configuration file (`claude_desktop_config.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"social-mcp": {
|
||||
"url": "http://127.0.0.1:3000/sse"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Available MCP Tools
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `xhs_check_login` | Check Xiaohongshu login status |
|
||||
| `xhs_get_login_qrcode` | Get login QR code for phone scanning |
|
||||
| `xhs_delete_cookies` | Delete cookies and reset login session |
|
||||
| `xhs_list_feeds` | Get explore page recommended feed list |
|
||||
| `xhs_search` | Search notes by keyword with filters |
|
||||
| `xhs_get_feed_detail` | Get note detail with content, images, stats, comments |
|
||||
| `xhs_get_user_profile` | Get user profile with bio, stats, recent notes |
|
||||
| `xhs_publish_image` | Publish an image note |
|
||||
| `xhs_publish_video` | Publish a video note |
|
||||
| `xhs_post_comment` | Post a comment on a note |
|
||||
| `xhs_reply_comment` | Reply to a comment |
|
||||
| `xhs_like` | Like or unlike a note |
|
||||
| `xhs_favorite` | Favorite or unfavorite a note |
|
||||
|
||||
## REST API
|
||||
|
||||
All REST endpoints require a `Bearer` token in the `Authorization` header. The token is generated on first startup and printed to the console.
|
||||
|
||||
```bash
|
||||
# Example: check login status
|
||||
curl -H "Authorization: Bearer <token>" http://127.0.0.1:3000/api/xhs/login/status
|
||||
|
||||
# Example: search notes
|
||||
curl -X POST -H "Authorization: Bearer <token>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"keyword": "travel", "filters": {"sort": "popularity_descending"}}' \
|
||||
http://127.0.0.1:3000/api/xhs/search
|
||||
```
|
||||
|
||||
### Endpoints
|
||||
|
||||
| Method | Path | Description | Rate Limit |
|
||||
|--------|------|-------------|------------|
|
||||
| `GET` | `/api/xhs/login/status` | Check login status | 60/min |
|
||||
| `GET` | `/api/xhs/login/qrcode` | Get login QR code | 60/min |
|
||||
| `DELETE` | `/api/xhs/login/cookies` | Delete cookies | 10/min |
|
||||
| `GET` | `/api/xhs/feeds` | Get recommended feeds | 60/min |
|
||||
| `POST` | `/api/xhs/search` | Search notes | 60/min |
|
||||
| `POST` | `/api/xhs/feeds/detail` | Get note detail | 60/min |
|
||||
| `POST` | `/api/xhs/user/profile` | Get user profile | 60/min |
|
||||
| `POST` | `/api/xhs/publish/image` | Publish image note | 10/min |
|
||||
| `POST` | `/api/xhs/publish/video` | Publish video note | 10/min |
|
||||
| `POST` | `/api/xhs/comment` | Post a comment | 10/min |
|
||||
| `POST` | `/api/xhs/comment/reply` | Reply to a comment | 10/min |
|
||||
| `POST` | `/api/xhs/like` | Like/unlike a note | 10/min |
|
||||
| `POST` | `/api/xhs/favorite` | Favorite/unfavorite a note | 10/min |
|
||||
|
||||
### Response Format
|
||||
|
||||
All REST responses follow a consistent JSON format:
|
||||
|
||||
```json
|
||||
// Success
|
||||
{
|
||||
"success": true,
|
||||
"data": { ... }
|
||||
}
|
||||
|
||||
// Error
|
||||
{
|
||||
"success": false,
|
||||
"error": {
|
||||
"code": "VALIDATION_ERROR",
|
||||
"message": "keyword: Required"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Other Endpoints (no auth required)
|
||||
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| `GET` | `/health` | Health check (memory, uptime, plugin status) |
|
||||
| `GET` | `/sse` | MCP SSE transport |
|
||||
| `POST` | `/messages` | MCP JSON-RPC messages |
|
||||
|
||||
## Docker Deployment
|
||||
|
||||
### Using Docker Compose (recommended)
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose up -d
|
||||
|
||||
# View logs
|
||||
docker compose logs -f
|
||||
|
||||
# The API token is printed in the logs on first start
|
||||
docker compose logs social-mcp | grep "Bearer Token"
|
||||
```
|
||||
|
||||
### Using Docker directly
|
||||
|
||||
```bash
|
||||
# Build the image
|
||||
docker build -t social-mcp .
|
||||
|
||||
# Run with required settings
|
||||
docker run -d \
|
||||
--name social-mcp \
|
||||
-p 127.0.0.1:3000:3000 \
|
||||
--shm-size=1gb \
|
||||
--memory=2g \
|
||||
--cpus=2.0 \
|
||||
--security-opt=no-new-privileges:true \
|
||||
--cap-drop=ALL \
|
||||
--read-only \
|
||||
--tmpfs /tmp:size=512m \
|
||||
-v social-mcp-data:/home/appuser/.social-mcp \
|
||||
social-mcp
|
||||
```
|
||||
|
||||
**Important**: The `--shm-size=1gb` flag is required. Chromium uses `/dev/shm` for shared memory and the default 64MB causes crashes.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `PORT` | `3000` | HTTP server port |
|
||||
| `HOST` | `127.0.0.1` | Bind address (`0.0.0.0` requires `ALLOW_REMOTE`) |
|
||||
| `HEADLESS` | `true` | Run browser in headless mode |
|
||||
| `BROWSER_BIN` | (auto) | Custom Chromium executable path |
|
||||
| `LOG_LEVEL` | `info` | Pino log level (`debug`, `info`, `warn`, `error`) |
|
||||
| `NODE_ENV` | `development` | Environment (`production` disables pretty logs) |
|
||||
| `COOKIE_DIR` | `~/.social-mcp` | Directory for cookie and token storage |
|
||||
| `MAX_QUEUE_DEPTH` | `10` | Max pending operations per platform queue |
|
||||
| `ALLOW_REMOTE` | (unset) | Set to `yes-i-understand-the-risk` to allow `HOST=0.0.0.0` |
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
social-mcp/
|
||||
├── package.json
|
||||
├── tsconfig.json
|
||||
├── tsup.config.ts
|
||||
├── Dockerfile
|
||||
├── deploy/
|
||||
│ └── docker-compose.yml
|
||||
├── src/
|
||||
│ ├── index.ts # Entry point: bootstrap, plugin registration, graceful shutdown
|
||||
│ ├── server/
|
||||
│ │ ├── app.ts # AppServer: Express + MCP lifecycle
|
||||
│ │ └── middleware.ts # DNS rebinding guard, bearer auth, rate limiter, error handler
|
||||
│ ├── browser/
|
||||
│ │ └── manager.ts # BrowserManager: browser lifecycle, serial queues, backpressure
|
||||
│ ├── cookie/
|
||||
│ │ └── store.ts # CookieStore: per-platform cookie persistence (0600, atomic writes)
|
||||
│ ├── config/
|
||||
│ │ └── index.ts # Environment-based configuration
|
||||
│ ├── utils/
|
||||
│ │ ├── logger.ts # Pino logger with deep redaction
|
||||
│ │ ├── errors.ts # Error classification, sanitization, MCP error wrapper
|
||||
│ │ └── downloader.ts # Media file download and path validation
|
||||
│ └── platforms/
|
||||
│ └── xiaohongshu/
|
||||
│ ├── index.ts # PlatformPlugin: MCP tool + REST route registration
|
||||
│ ├── routes.ts # REST API route handlers
|
||||
│ ├── schemas.ts # Zod schemas for tool/API parameter validation
|
||||
│ ├── types.ts # Domain types (Feed, Comment, UserProfile, etc.)
|
||||
│ ├── selectors.ts # CSS selector constants
|
||||
│ ├── login.ts # Login management (QR code, status check)
|
||||
│ ├── feeds.ts # Explore page feed extraction
|
||||
│ ├── search.ts # Search with filters
|
||||
│ ├── feed-detail.ts # Note detail + comment loading
|
||||
│ ├── user-profile.ts # User profile extraction
|
||||
│ ├── publish.ts # Image note publishing
|
||||
│ ├── publish-video.ts # Video note publishing
|
||||
│ ├── comment.ts # Comment and reply posting
|
||||
│ └── interaction.ts # Like and favorite toggling
|
||||
└── tests/
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
ISC
|
||||
@@ -0,0 +1,35 @@
|
||||
services:
|
||||
social-mcp:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "127.0.0.1:3000:3000"
|
||||
shm_size: '1gb'
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 2g
|
||||
cpus: '2.0'
|
||||
reservations:
|
||||
memory: 1g
|
||||
cpus: '1.0'
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp:size=512m
|
||||
volumes:
|
||||
- cookie-data:/home/appuser/.social-mcp
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- HOST=0.0.0.0
|
||||
- PORT=3000
|
||||
- HEADLESS=true
|
||||
- ALLOW_REMOTE=yes-i-understand-the-risk
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
cookie-data:
|
||||
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"name": "social-mcp",
|
||||
"version": "0.1.0",
|
||||
"description": "Multi-platform social media automation MCP service",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"bin": {
|
||||
"social-mcp": "dist/index.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsup",
|
||||
"dev": "tsup --watch",
|
||||
"start": "node dist/index.js",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest",
|
||||
"lint": "tsc --noEmit"
|
||||
},
|
||||
"keywords": [
|
||||
"mcp",
|
||||
"social-media",
|
||||
"automation",
|
||||
"playwright",
|
||||
"xiaohongshu"
|
||||
],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"packageManager": "pnpm@10.25.0",
|
||||
"engines": {
|
||||
"node": ">=22.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.27.0",
|
||||
"rebrowser-playwright": "^1.52.0",
|
||||
"express": "^4.21.0",
|
||||
"pino": "^9.0.0",
|
||||
"zod": "^3.25.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.7.0",
|
||||
"tsup": "^8.0.0",
|
||||
"vitest": "^3.0.0",
|
||||
"pino-pretty": "^13.0.0",
|
||||
"@types/express": "^5.0.0",
|
||||
"@types/node": "^22.0.0"
|
||||
},
|
||||
"pnpm": {
|
||||
"onlyBuiltDependencies": [
|
||||
"esbuild"
|
||||
]
|
||||
}
|
||||
}
|
||||
Generated
+2572
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,374 @@
|
||||
import {
|
||||
chromium,
|
||||
type Browser,
|
||||
type BrowserContext,
|
||||
type Page,
|
||||
} from 'rebrowser-playwright';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { cookieStore } from '../cookie/store.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** How long acquirePage() waits before auto-releasing the page (5 minutes). */
|
||||
const ACQUIRE_SAFETY_TIMEOUT_MS = 5 * 60_000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// BrowserManager
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class BrowserManager {
|
||||
// -- State ----------------------------------------------------------------
|
||||
|
||||
private browser: Browser | null = null;
|
||||
private contexts = new Map<string, BrowserContext>();
|
||||
private queues = new Map<string, Promise<void>>();
|
||||
private queueDepths = new Map<string, number>();
|
||||
|
||||
/**
|
||||
* Mutex-style promise that prevents concurrent browser launches.
|
||||
* While a launch is in-flight every caller awaits the same promise.
|
||||
*/
|
||||
private launchPromise: Promise<Browser> | null = null;
|
||||
|
||||
// -- Public API -----------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Execute `fn` on a fresh page inside the platform-specific context.
|
||||
*
|
||||
* Operations are serialised per-platform through a promise chain so that
|
||||
* at most one page is active per platform at any time. Back-pressure is
|
||||
* enforced via `config.maxQueueDepth`.
|
||||
*
|
||||
* @param platform - Platform identifier (e.g. "twitter", "xiaohongshu").
|
||||
* @param fn - Async callback that receives the page.
|
||||
* @param timeoutMs - Optional timeout override (defaults to the
|
||||
* `operationTimeouts.default` value from config).
|
||||
* @returns The value returned by `fn`.
|
||||
*/
|
||||
async withPage<T>(
|
||||
platform: string,
|
||||
fn: (page: Page) => Promise<T>,
|
||||
timeoutMs?: number,
|
||||
): Promise<T> {
|
||||
// -- Back-pressure check ------------------------------------------------
|
||||
const currentDepth = this.queueDepths.get(platform) ?? 0;
|
||||
if (currentDepth >= config.maxQueueDepth) {
|
||||
throw new Error(
|
||||
`Queue full for platform "${platform}" (depth=${currentDepth}, ` +
|
||||
`max=${config.maxQueueDepth}). Try again later.`,
|
||||
);
|
||||
}
|
||||
this.queueDepths.set(platform, currentDepth + 1);
|
||||
|
||||
// -- Resolve effective timeout ------------------------------------------
|
||||
const effectiveTimeout =
|
||||
timeoutMs ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
// -- Build the task and chain onto the per-platform queue ---------------
|
||||
const previous = this.queues.get(platform) ?? Promise.resolve();
|
||||
|
||||
const task: Promise<T> = previous.then(async () => {
|
||||
const browser = await this.ensureBrowser();
|
||||
|
||||
// The browser may have disconnected while this task was queued.
|
||||
if (!browser.isConnected()) {
|
||||
throw new Error('Browser disconnected while waiting in queue');
|
||||
}
|
||||
|
||||
const ctx = await this.getContext(platform);
|
||||
const page = await ctx.newPage();
|
||||
|
||||
page.setDefaultTimeout(effectiveTimeout);
|
||||
page.setDefaultNavigationTimeout(effectiveTimeout);
|
||||
|
||||
try {
|
||||
// Race the user function against a hard timeout. clearTimeout
|
||||
// is handled implicitly: when fn resolves first the timeout
|
||||
// promise is simply abandoned and its timer unref'd so it cannot
|
||||
// keep the process alive.
|
||||
const result = await Promise.race<T>([
|
||||
fn(page),
|
||||
this.createTimeout<T>(effectiveTimeout, platform),
|
||||
]);
|
||||
return result;
|
||||
} finally {
|
||||
await page.close().catch((err: unknown) => {
|
||||
logger.warn({ err, platform }, 'Failed to close page');
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Swallow errors so the promise chain continues for the next caller.
|
||||
// The actual rejection is still returned to **this** caller via `task`.
|
||||
const chainContinuation = task.then(
|
||||
() => {},
|
||||
() => {},
|
||||
);
|
||||
|
||||
// Decrement queue depth when this task settles, regardless of outcome.
|
||||
void chainContinuation.finally(() => {
|
||||
const depth = this.queueDepths.get(platform) ?? 1;
|
||||
if (depth <= 1) {
|
||||
this.queueDepths.delete(platform);
|
||||
} else {
|
||||
this.queueDepths.set(platform, depth - 1);
|
||||
}
|
||||
});
|
||||
|
||||
this.queues.set(platform, chainContinuation);
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquire a page that the caller manages manually (e.g. for interactive
|
||||
* login flows). The caller **must** call `release()` when finished.
|
||||
*
|
||||
* A safety-net timer auto-releases the page after 5 minutes to prevent
|
||||
* resource leaks if the caller forgets.
|
||||
*
|
||||
* @param platform - Platform identifier.
|
||||
* @returns Object with `page` and an idempotent `release` function.
|
||||
*/
|
||||
async acquirePage(
|
||||
platform: string,
|
||||
): Promise<{ page: Page; release: () => Promise<void> }> {
|
||||
await this.ensureBrowser();
|
||||
const ctx = await this.getContext(platform);
|
||||
const page = await ctx.newPage();
|
||||
|
||||
let released = false;
|
||||
|
||||
const release = async (): Promise<void> => {
|
||||
if (released) return;
|
||||
released = true;
|
||||
clearTimeout(safetyTimer);
|
||||
await page.close().catch((err: unknown) => {
|
||||
logger.warn({ err, platform }, 'Failed to close acquired page');
|
||||
});
|
||||
};
|
||||
|
||||
const safetyTimer = setTimeout(() => {
|
||||
if (!released) {
|
||||
logger.warn(
|
||||
{ platform },
|
||||
`acquirePage safety timeout: auto-releasing page after ${ACQUIRE_SAFETY_TIMEOUT_MS}ms`,
|
||||
);
|
||||
void release();
|
||||
}
|
||||
}, ACQUIRE_SAFETY_TIMEOUT_MS);
|
||||
|
||||
// Prevent the timer from keeping the Node.js process alive.
|
||||
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
|
||||
safetyTimer.unref();
|
||||
}
|
||||
|
||||
return { page, release };
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the current cookie / storage state of a platform's browser context
|
||||
* to disk via the CookieStore.
|
||||
*
|
||||
* @param platform - Platform identifier whose context should be persisted.
|
||||
*/
|
||||
async saveCookies(platform: string): Promise<void> {
|
||||
const ctx = this.contexts.get(platform);
|
||||
if (!ctx) {
|
||||
logger.warn(
|
||||
{ platform },
|
||||
'saveCookies called but no context exists for platform',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const state = await ctx.storageState();
|
||||
// Playwright's storageState() return type is structurally compatible with
|
||||
// our CookieStore's StorageState interface.
|
||||
await cookieStore.save(platform, state);
|
||||
logger.debug({ platform }, 'Cookies saved');
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for every in-flight platform queue to settle. Useful during
|
||||
* graceful shutdown so that running operations finish before teardown.
|
||||
*/
|
||||
async drain(): Promise<void> {
|
||||
const pending = [...this.queues.values()];
|
||||
if (pending.length === 0) return;
|
||||
|
||||
logger.info(
|
||||
{ queueCount: pending.length },
|
||||
'Draining browser operation queues',
|
||||
);
|
||||
|
||||
await Promise.allSettled(pending);
|
||||
|
||||
logger.info('All browser operation queues drained');
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all browser contexts and the browser itself, then reset internal
|
||||
* state. Safe to call multiple times.
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
// Close every context individually so callers that need to flush
|
||||
// storageState can do so before calling close().
|
||||
const contextClosePromises = [...this.contexts.values()].map((ctx) =>
|
||||
ctx.close().catch((err: unknown) => {
|
||||
logger.warn({ err }, 'Error closing browser context during shutdown');
|
||||
}),
|
||||
);
|
||||
await Promise.all(contextClosePromises);
|
||||
|
||||
if (this.browser) {
|
||||
await this.browser.close().catch((err: unknown) => {
|
||||
logger.warn({ err }, 'Error closing browser during shutdown');
|
||||
});
|
||||
}
|
||||
|
||||
this.browser = null;
|
||||
this.contexts.clear();
|
||||
this.queues.clear();
|
||||
this.queueDepths.clear();
|
||||
this.launchPromise = null;
|
||||
|
||||
logger.info('BrowserManager closed');
|
||||
}
|
||||
|
||||
// -- Private helpers ------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Ensure the browser is launched and connected. Uses a launch mutex so
|
||||
* that concurrent callers share a single launch attempt instead of
|
||||
* spawning multiple browser processes.
|
||||
*/
|
||||
private async ensureBrowser(): Promise<Browser> {
|
||||
if (this.browser?.isConnected()) {
|
||||
return this.browser;
|
||||
}
|
||||
|
||||
// If another caller is already launching, piggy-back on that promise.
|
||||
if (this.launchPromise) {
|
||||
return this.launchPromise;
|
||||
}
|
||||
|
||||
this.launchPromise = this.launchBrowser();
|
||||
|
||||
try {
|
||||
const browser = await this.launchPromise;
|
||||
return browser;
|
||||
} finally {
|
||||
this.launchPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Launch a Chromium instance via rebrowser-playwright.
|
||||
*/
|
||||
private async launchBrowser(): Promise<Browser> {
|
||||
logger.info(
|
||||
{ headless: config.headless, browserBin: config.browserBin ?? 'default' },
|
||||
'Launching browser',
|
||||
);
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: config.headless,
|
||||
...(config.browserBin ? { executablePath: config.browserBin } : {}),
|
||||
});
|
||||
|
||||
// React to unexpected disconnects (e.g. browser crash, OOM kill).
|
||||
browser.on('disconnected', () => {
|
||||
logger.error('Browser disconnected unexpectedly');
|
||||
this.browser = null;
|
||||
this.contexts.clear();
|
||||
// launchPromise is intentionally NOT cleared here so the next caller
|
||||
// that calls ensureBrowser() will attempt a fresh launch.
|
||||
});
|
||||
|
||||
this.browser = browser;
|
||||
|
||||
logger.info('Browser launched successfully');
|
||||
|
||||
return browser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get (or lazily create) a BrowserContext for the given platform.
|
||||
*
|
||||
* On first creation we attempt to restore cookies from the CookieStore
|
||||
* so that sessions survive process restarts.
|
||||
*/
|
||||
private async getContext(platform: string): Promise<BrowserContext> {
|
||||
const existing = this.contexts.get(platform);
|
||||
if (existing) return existing;
|
||||
|
||||
if (!this.browser) {
|
||||
throw new Error('Cannot create context: browser is not launched');
|
||||
}
|
||||
|
||||
// Attempt to restore a previous session's storage state from disk.
|
||||
let storageState: Awaited<ReturnType<BrowserContext['storageState']>> | undefined;
|
||||
try {
|
||||
const loaded = await cookieStore.load(platform);
|
||||
if (loaded) {
|
||||
storageState = loaded;
|
||||
logger.debug(
|
||||
{ platform, cookieCount: loaded.cookies.length },
|
||||
'Restoring saved cookies into new context',
|
||||
);
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
// Cookie load failure should never prevent context creation.
|
||||
logger.warn(
|
||||
{ err, platform },
|
||||
'Failed to load saved cookies -- creating fresh context',
|
||||
);
|
||||
}
|
||||
|
||||
const ctx = await this.browser.newContext(
|
||||
storageState ? { storageState } : {},
|
||||
);
|
||||
|
||||
this.contexts.set(platform, ctx);
|
||||
|
||||
logger.debug({ platform }, 'Browser context created');
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a promise that rejects after `ms` milliseconds, used as the
|
||||
* timeout arm in `Promise.race` inside `withPage`.
|
||||
*
|
||||
* The timer is `unref()`'d so it cannot keep the Node.js event loop alive
|
||||
* during graceful shutdown. When the user's function wins the race the
|
||||
* dangling timeout promise is harmlessly garbage-collected.
|
||||
*/
|
||||
private createTimeout<T>(ms: number, platform: string): Promise<T> {
|
||||
return new Promise<T>((_resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
reject(
|
||||
new Error(
|
||||
`Operation timed out after ${ms}ms for platform "${platform}"`,
|
||||
),
|
||||
);
|
||||
}, ms);
|
||||
|
||||
// Prevent the timeout from keeping the process alive during shutdown.
|
||||
if (typeof timer === 'object' && 'unref' in timer) {
|
||||
timer.unref();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Singleton export
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const browserManager = new BrowserManager();
|
||||
@@ -0,0 +1,112 @@
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function envString(key: string, fallback: string): string {
|
||||
return process.env[key] ?? fallback;
|
||||
}
|
||||
|
||||
function envInt(key: string, fallback: number): number {
|
||||
const raw = process.env[key];
|
||||
if (raw === undefined) return fallback;
|
||||
const parsed = Number.parseInt(raw, 10);
|
||||
if (Number.isNaN(parsed)) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(`[config] Invalid integer for ${key}="${raw}", using default ${fallback}`);
|
||||
return fallback;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function envBool(key: string, fallback: boolean): boolean {
|
||||
const raw = process.env[key];
|
||||
if (raw === undefined) return fallback;
|
||||
// Accept common truthy / falsy strings
|
||||
if (['true', '1', 'yes'].includes(raw.toLowerCase())) return true;
|
||||
if (['false', '0', 'no'].includes(raw.toLowerCase())) return false;
|
||||
return fallback;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HOST safety check — must run before exporting config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const host = envString('HOST', '127.0.0.1');
|
||||
|
||||
if (host === '0.0.0.0' || host === '::') {
|
||||
const allow = process.env['ALLOW_REMOTE'];
|
||||
if (allow !== 'yes-i-understand-the-risk') {
|
||||
// Use console.error directly — the logger module depends on config,
|
||||
// so it is not available yet at this point.
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(
|
||||
`[FATAL] HOST is set to "${host}" which exposes the service to the network.\n` +
|
||||
`If you really intend to do this, set ALLOW_REMOTE=yes-i-understand-the-risk\n` +
|
||||
`Refusing to start.`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Operation timeouts (milliseconds)
|
||||
// Matches the tiers described in PLAN.md section 6.1
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const operationTimeouts: Record<string, number> = {
|
||||
like: 15_000, // 15s — quick interactions
|
||||
favorite: 15_000, // 15s
|
||||
comment: 20_000, // 20s
|
||||
reply: 20_000, // 20s
|
||||
feed_list: 30_000, // 30s — page load + extraction
|
||||
search: 30_000, // 30s
|
||||
feed_detail: 60_000, // 60s — includes scroll loading
|
||||
user_profile: 60_000, // 60s
|
||||
publish: 300_000, // 5min — upload may be slow
|
||||
login: 300_000, // 5min — user interaction
|
||||
default: 60_000, // 1min — fallback
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface AppConfig {
|
||||
/** HTTP port */
|
||||
port: number;
|
||||
/** HTTP bind address */
|
||||
host: string;
|
||||
/** Run browser in headless mode */
|
||||
headless: boolean;
|
||||
/** Custom browser executable path (optional) */
|
||||
browserBin: string | undefined;
|
||||
/** Pino log level */
|
||||
logLevel: string;
|
||||
/** NODE_ENV */
|
||||
nodeEnv: string;
|
||||
/** Directory for per-platform cookie storage */
|
||||
cookieDir: string;
|
||||
/** Max pending operations per platform queue */
|
||||
maxQueueDepth: number;
|
||||
/** Per-operation-type timeout in ms */
|
||||
operationTimeouts: Record<string, number>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Exported config singleton
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const config: AppConfig = {
|
||||
port: envInt('PORT', 3000),
|
||||
host,
|
||||
headless: envBool('HEADLESS', true),
|
||||
browserBin: process.env['BROWSER_BIN'] || undefined,
|
||||
logLevel: envString('LOG_LEVEL', 'info'),
|
||||
nodeEnv: envString('NODE_ENV', 'development'),
|
||||
cookieDir: envString('COOKIE_DIR', path.join(os.homedir(), '.social-mcp')),
|
||||
maxQueueDepth: envInt('MAX_QUEUE_DEPTH', 10),
|
||||
operationTimeouts,
|
||||
};
|
||||
@@ -0,0 +1,171 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types — mirrors Playwright's BrowserContext.storageState() shape
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
expires: number;
|
||||
httpOnly: boolean;
|
||||
secure: boolean;
|
||||
sameSite: 'Strict' | 'Lax' | 'None';
|
||||
}
|
||||
|
||||
export interface StorageState {
|
||||
cookies: Cookie[];
|
||||
origins: Array<{
|
||||
origin: string;
|
||||
localStorage: Array<{ name: string; value: string }>;
|
||||
}>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CookieStore
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const log = logger.child({ module: 'cookie-store' });
|
||||
|
||||
export class CookieStore {
|
||||
/**
|
||||
* Return the absolute path to the cookies.json for a given platform.
|
||||
*/
|
||||
getPath(platform: string): string {
|
||||
return path.join(config.cookieDir, platform, 'cookies.json');
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the persisted storage state for a platform.
|
||||
* Returns `null` when no cookie file exists yet.
|
||||
*/
|
||||
async load(platform: string): Promise<StorageState | null> {
|
||||
const filePath = this.getPath(platform);
|
||||
|
||||
try {
|
||||
const raw = await fs.readFile(filePath, 'utf-8');
|
||||
const parsed: unknown = JSON.parse(raw);
|
||||
|
||||
// Minimal structural validation so we don't blindly trust disk data.
|
||||
if (!isStorageState(parsed)) {
|
||||
log.warn({ platform, filePath }, 'Cookie file failed validation, treating as absent');
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug({ platform, cookieCount: parsed.cookies.length }, 'Loaded cookies from disk');
|
||||
return parsed;
|
||||
} catch (err: unknown) {
|
||||
if (isNodeError(err) && err.code === 'ENOENT') {
|
||||
log.debug({ platform }, 'No cookie file found');
|
||||
return null;
|
||||
}
|
||||
log.error({ err, platform, filePath }, 'Failed to load cookie file');
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist a storage state for a platform using an atomic write.
|
||||
*
|
||||
* Strategy:
|
||||
* 1. Ensure the platform directory exists (mode 0o700).
|
||||
* 2. Write to a temporary file (`.tmp.<pid>`) inside the same directory.
|
||||
* 3. Set file permissions to 0o600.
|
||||
* 4. Atomically rename the temp file to the final path.
|
||||
*
|
||||
* Because rename is atomic on the same filesystem, readers will never
|
||||
* observe a partially-written cookies.json.
|
||||
*/
|
||||
async save(platform: string, state: StorageState): Promise<void> {
|
||||
const filePath = this.getPath(platform);
|
||||
const dir = path.dirname(filePath);
|
||||
const tmpPath = path.join(dir, `.tmp.${process.pid}`);
|
||||
|
||||
try {
|
||||
// Ensure directory exists with restricted permissions.
|
||||
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
|
||||
|
||||
const json = JSON.stringify(state, null, 2);
|
||||
|
||||
// Write to temp file, set permissions, then atomically rename.
|
||||
await fs.writeFile(tmpPath, json, { encoding: 'utf-8', mode: 0o600 });
|
||||
await fs.rename(tmpPath, filePath);
|
||||
|
||||
log.debug(
|
||||
{ platform, cookieCount: state.cookies.length },
|
||||
'Saved cookies to disk',
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
log.error({ err, platform, filePath }, 'Failed to save cookie file');
|
||||
|
||||
// Best-effort cleanup of the temp file.
|
||||
try {
|
||||
await fs.unlink(tmpPath);
|
||||
} catch {
|
||||
// Ignore — the temp file may not have been created.
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the cookie file for a platform.
|
||||
* Silently succeeds when no file exists.
|
||||
*/
|
||||
async delete(platform: string): Promise<void> {
|
||||
const filePath = this.getPath(platform);
|
||||
|
||||
try {
|
||||
await fs.unlink(filePath);
|
||||
log.debug({ platform }, 'Deleted cookie file');
|
||||
} catch (err: unknown) {
|
||||
if (isNodeError(err) && err.code === 'ENOENT') {
|
||||
log.debug({ platform }, 'Cookie file already absent, nothing to delete');
|
||||
return;
|
||||
}
|
||||
log.error({ err, platform, filePath }, 'Failed to delete cookie file');
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Singleton
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const cookieStore = new CookieStore();
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface NodeError extends Error {
|
||||
code?: string;
|
||||
}
|
||||
|
||||
function isNodeError(err: unknown): err is NodeError {
|
||||
return err instanceof Error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lightweight runtime check that the parsed JSON matches the StorageState
|
||||
* shape we expect. This is intentionally lenient — we only verify the
|
||||
* top-level structure so that forward-compatible fields are not rejected.
|
||||
*/
|
||||
function isStorageState(value: unknown): value is StorageState {
|
||||
if (typeof value !== 'object' || value === null) return false;
|
||||
|
||||
const obj = value as Record<string, unknown>;
|
||||
|
||||
if (!Array.isArray(obj['cookies'])) return false;
|
||||
if (!Array.isArray(obj['origins'])) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
import { logger } from './utils/logger.js';
|
||||
import { browserManager } from './browser/manager.js';
|
||||
import { AppServer } from './server/app.js';
|
||||
import { xiaohongshuPlugin } from './platforms/xiaohongshu/index.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Bootstrap
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const appServer = new AppServer();
|
||||
|
||||
// -- Platform plugins -------------------------------------------------------
|
||||
appServer.registerPlugin(xiaohongshuPlugin);
|
||||
|
||||
// -- Start ------------------------------------------------------------------
|
||||
|
||||
appServer.start().catch((err: unknown) => {
|
||||
logger.fatal({ err }, 'Failed to start server');
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Graceful shutdown
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let shuttingDown = false;
|
||||
|
||||
async function gracefulShutdown(signal: string): Promise<void> {
|
||||
if (shuttingDown) return;
|
||||
shuttingDown = true;
|
||||
|
||||
logger.info({ signal }, 'Received shutdown signal — starting graceful shutdown');
|
||||
|
||||
// Safety net: if graceful shutdown takes too long, force exit.
|
||||
const forceExitTimer = setTimeout(() => {
|
||||
logger.fatal('Graceful shutdown timed out after 45s — forcing exit');
|
||||
process.exit(1);
|
||||
}, 45_000);
|
||||
|
||||
// Prevent the safety-net timer from keeping the process alive on its own.
|
||||
if (typeof forceExitTimer === 'object' && 'unref' in forceExitTimer) {
|
||||
forceExitTimer.unref();
|
||||
}
|
||||
|
||||
try {
|
||||
// Step 1: Drain browser queues so in-flight operations finish (max 30s).
|
||||
logger.info('Shutdown step 1/5: draining browser queues');
|
||||
await Promise.race([
|
||||
browserManager.drain(),
|
||||
new Promise<void>((resolve) => setTimeout(resolve, 30_000).unref()),
|
||||
]);
|
||||
|
||||
// Step 2: Close the browser and all contexts.
|
||||
logger.info('Shutdown step 2/5: closing browser');
|
||||
await browserManager.close();
|
||||
|
||||
// Step 3: Close the HTTP server (stop accepting new connections).
|
||||
logger.info('Shutdown step 3/5: closing HTTP server');
|
||||
await appServer.close();
|
||||
|
||||
// Step 4: Flush structured logs so nothing is lost.
|
||||
logger.info('Shutdown step 4/5: flushing logger');
|
||||
logger.flush();
|
||||
|
||||
// Step 5: Exit cleanly.
|
||||
logger.info('Shutdown step 5/5: exiting');
|
||||
process.exit(0);
|
||||
} catch (err: unknown) {
|
||||
logger.fatal({ err }, 'Error during graceful shutdown');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
process.on('SIGINT', () => void gracefulShutdown('SIGINT'));
|
||||
process.on('SIGTERM', () => void gracefulShutdown('SIGTERM'));
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Global error handlers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
process.on('unhandledRejection', (reason: unknown) => {
|
||||
logger.fatal({ err: reason }, 'Unhandled promise rejection');
|
||||
void gracefulShutdown('unhandledRejection');
|
||||
});
|
||||
|
||||
process.on('uncaughtException', (err: Error) => {
|
||||
logger.fatal({ err }, 'Uncaught exception');
|
||||
void gracefulShutdown('uncaughtException');
|
||||
});
|
||||
@@ -0,0 +1,322 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Wait after typing comment text before submitting. */
|
||||
const TYPE_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after submit click to verify success. */
|
||||
const SUBMIT_SETTLE_MS = 2_000;
|
||||
|
||||
const selComment = XHS_SELECTORS.comment;
|
||||
const selDetail = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-comment' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// postComment
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Post a top-level comment on a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID to comment on.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param content - Comment text to post.
|
||||
* @returns Object indicating whether the comment was posted successfully.
|
||||
*/
|
||||
export async function postComment(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
content: string,
|
||||
): Promise<{ success: boolean }> {
|
||||
log.info({ feedId }, 'Posting comment on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note container to be visible.
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Find and focus the comment input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const commentInput = await findCommentInput(page);
|
||||
|
||||
if (!commentInput) {
|
||||
log.warn('Comment input not found on feed detail page');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Type the comment content
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await commentInput.click();
|
||||
await page.waitForTimeout(300);
|
||||
await page.keyboard.type(content, { delay: 30 });
|
||||
await page.waitForTimeout(TYPE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Submit the comment
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const submitted = await submitComment(page);
|
||||
|
||||
if (!submitted) {
|
||||
log.warn('Failed to submit comment — submit button not found or click failed');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Verify the comment was posted
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.waitForTimeout(SUBMIT_SETTLE_MS);
|
||||
|
||||
// Check for the comment text in the page to verify success.
|
||||
const pageContent = await page.content();
|
||||
const success = pageContent.includes(content.slice(0, 20));
|
||||
|
||||
log.info({ feedId, success }, 'Comment post complete');
|
||||
|
||||
return { success };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// replyComment
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reply to an existing comment on a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param content - Reply text to post.
|
||||
* @param commentId - Optional ID of the comment to reply to (for targeting).
|
||||
* @param userId - Optional user ID of the comment author (for @ mention).
|
||||
* @returns Object indicating whether the reply was posted successfully.
|
||||
*/
|
||||
export async function replyComment(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
content: string,
|
||||
commentId?: string,
|
||||
userId?: string,
|
||||
): Promise<{ success: boolean }> {
|
||||
log.info({ feedId, commentId, userId }, 'Replying to comment on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Find the target comment and click its reply button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
let replyInputFound = false;
|
||||
|
||||
if (commentId) {
|
||||
// Try to find the comment by its ID attribute and click its reply button.
|
||||
replyInputFound = await clickReplyOnComment(page, commentId);
|
||||
}
|
||||
|
||||
if (!replyInputFound) {
|
||||
// Fallback: find the first comment's reply button.
|
||||
const commentItems = await page.$$(selComment.commentItem);
|
||||
|
||||
if (commentItems.length > 0) {
|
||||
// Hover to reveal the reply button (some UIs show it on hover).
|
||||
const firstComment = commentItems[0]!;
|
||||
await firstComment.hover();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
const replyBtn = await firstComment.$(selComment.commentReplyButton);
|
||||
if (replyBtn) {
|
||||
await replyBtn.click();
|
||||
await page.waitForTimeout(500);
|
||||
replyInputFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we still haven't activated a reply input, fall back to the main
|
||||
// comment input and prefix with @userId if available.
|
||||
if (!replyInputFound) {
|
||||
log.debug('Reply button not found, falling back to main comment input');
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Type the reply content
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Try to find the reply-specific input first, then fall back to the
|
||||
// main comment input.
|
||||
const replyInput =
|
||||
(await page.$(selComment.replyInput)) ??
|
||||
(await findCommentInput(page));
|
||||
|
||||
if (!replyInput) {
|
||||
log.warn('Reply input not found');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
await replyInput.click();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
// If we're using the main comment input as fallback, prefix with @user.
|
||||
const textToType =
|
||||
!replyInputFound && userId ? `@${userId} ${content}` : content;
|
||||
|
||||
await page.keyboard.type(textToType, { delay: 30 });
|
||||
await page.waitForTimeout(TYPE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Submit the reply
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const submitted = await submitComment(page);
|
||||
|
||||
if (!submitted) {
|
||||
log.warn('Failed to submit reply');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Verify
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.waitForTimeout(SUBMIT_SETTLE_MS);
|
||||
|
||||
const pageContent = await page.content();
|
||||
const success = pageContent.includes(content.slice(0, 20));
|
||||
|
||||
log.info({ feedId, commentId, success }, 'Reply post complete');
|
||||
|
||||
return { success };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the URL for a feed detail page.
|
||||
*/
|
||||
function buildFeedUrl(feedId: string, xsecToken: string): string {
|
||||
return `${FEED_DETAIL_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_search`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the main comment input element. Tries the primary selector first,
|
||||
* then the alternative contenteditable selector.
|
||||
*/
|
||||
async function findCommentInput(page: Page) {
|
||||
// Try the primary comment textarea.
|
||||
let input = await page.$(selComment.commentInput);
|
||||
if (input) return input;
|
||||
|
||||
// Try the alternative contenteditable div.
|
||||
input = await page.$(selComment.commentInputAlt);
|
||||
if (input) return input;
|
||||
|
||||
// Last resort: try clicking in the comment area to activate the input.
|
||||
// Some UIs only show the input after clicking in the comment zone.
|
||||
const commentArea = await page.$('.comment-area, .comments-container');
|
||||
if (commentArea) {
|
||||
await commentArea.click();
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
input = await page.$(selComment.commentInput);
|
||||
if (input) return input;
|
||||
|
||||
input = await page.$(selComment.commentInputAlt);
|
||||
if (input) return input;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a specific comment by its ID and click its reply button.
|
||||
*/
|
||||
async function clickReplyOnComment(
|
||||
page: Page,
|
||||
commentId: string,
|
||||
): Promise<boolean> {
|
||||
// Try to find comment by data attribute or ID.
|
||||
const commentEl =
|
||||
(await page.$(`[id="comment-${commentId}"]`)) ??
|
||||
(await page.$(`[data-comment-id="${commentId}"]`));
|
||||
|
||||
if (!commentEl) {
|
||||
log.debug({ commentId }, 'Target comment element not found by ID');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Hover to reveal the reply button.
|
||||
await commentEl.hover();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
const replyBtn = await commentEl.$(selComment.commentReplyButton);
|
||||
if (!replyBtn) {
|
||||
log.debug({ commentId }, 'Reply button not found on target comment');
|
||||
return false;
|
||||
}
|
||||
|
||||
await replyBtn.click();
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find and click the comment submit button. Tries multiple selectors.
|
||||
*/
|
||||
async function submitComment(page: Page): Promise<boolean> {
|
||||
// Try the primary submit button.
|
||||
let submitBtn = await page.$(selComment.commentSubmitButton);
|
||||
|
||||
if (!submitBtn) {
|
||||
// Some UIs submit on Ctrl+Enter / Cmd+Enter.
|
||||
log.debug('Submit button not found, trying keyboard shortcut');
|
||||
await page.keyboard.press('Control+Enter');
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the button is enabled before clicking.
|
||||
const isDisabled = await submitBtn.isDisabled().catch(() => false);
|
||||
if (isDisabled) {
|
||||
log.debug('Submit button is disabled, waiting briefly');
|
||||
await page.waitForTimeout(1_000);
|
||||
// Re-query in case the button became enabled.
|
||||
submitBtn = await page.$(selComment.commentSubmitButton);
|
||||
if (!submitBtn) return false;
|
||||
}
|
||||
|
||||
await submitBtn.click();
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,724 @@
|
||||
import type { Page, ElementHandle } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { FeedDetail, Comment } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_BASE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Maximum number of "show more" clicks to load comments. */
|
||||
const MAX_LOAD_MORE_CLICKS = 20;
|
||||
|
||||
/** Delay between "show more" clicks to let the page render. */
|
||||
const LOAD_MORE_DELAY_MS = 1500;
|
||||
|
||||
const SEL = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-feed-detail' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for feed detail
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawDetailState {
|
||||
noteData?: {
|
||||
data?: {
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
note?: {
|
||||
noteDetailMap?: Record<string, { note?: RawNoteData }>;
|
||||
note?: RawNoteData;
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface RawNoteData {
|
||||
noteId?: string;
|
||||
id?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
type?: string;
|
||||
imageList?: RawNoteImage[];
|
||||
image_list?: RawNoteImage[];
|
||||
video?: RawNoteVideo;
|
||||
tagList?: RawNoteTag[];
|
||||
tag_list?: RawNoteTag[];
|
||||
interactInfo?: RawNoteInteract;
|
||||
interact_info?: RawNoteInteract;
|
||||
time?: number;
|
||||
createTime?: number;
|
||||
create_time?: number;
|
||||
lastUpdateTime?: number;
|
||||
last_update_time?: number;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
user?: RawNoteUser;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
comments?: RawCommentData[];
|
||||
}
|
||||
|
||||
interface RawNoteImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawNoteVideo {
|
||||
url?: string;
|
||||
consumer?: {
|
||||
originVideoKey?: string;
|
||||
origin_video_key?: string;
|
||||
};
|
||||
media?: {
|
||||
stream?: {
|
||||
h264?: Array<{
|
||||
masterUrl?: string;
|
||||
master_url?: string;
|
||||
}>;
|
||||
};
|
||||
video?: {
|
||||
url?: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface RawNoteTag {
|
||||
id?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
interface RawNoteInteract {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
collectedCount?: string;
|
||||
collected_count?: string;
|
||||
commentCount?: string;
|
||||
comment_count?: string;
|
||||
shareCount?: string;
|
||||
share_count?: string;
|
||||
}
|
||||
|
||||
interface RawNoteUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawCommentData {
|
||||
id?: string;
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
userInfo?: {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
image?: string;
|
||||
avatar?: string;
|
||||
};
|
||||
content?: string;
|
||||
likeCount?: string | number;
|
||||
like_count?: string | number;
|
||||
createTime?: number;
|
||||
create_time?: number;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
subComments?: RawCommentData[];
|
||||
sub_comments?: RawCommentData[];
|
||||
subCommentCount?: number | string;
|
||||
sub_comment_count?: number | string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getFeedDetail
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to a Xiaohongshu note detail page and extract comprehensive
|
||||
* information including title, content, images/video, stats, and comments.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note (feed) ID.
|
||||
* @param xsecToken - Security token required to access the note.
|
||||
* @param loadAllComments - If true, scrolls and clicks "load more" to fetch
|
||||
* as many comments as possible.
|
||||
* @returns A FeedDetail object with full note data and comments.
|
||||
*/
|
||||
export async function getFeedDetail(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
loadAllComments = false,
|
||||
): Promise<FeedDetail> {
|
||||
const url = `${FEED_DETAIL_BASE_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_feed`;
|
||||
log.debug({ feedId, url }, 'Navigating to feed detail page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note content container to appear.
|
||||
await page.waitForSelector(
|
||||
`${SEL.noteContainer}, ${SEL.title}, ${SEL.description}`,
|
||||
{ timeout: 15_000 },
|
||||
).catch(() => {
|
||||
log.warn({ feedId }, 'Note container not found within timeout, proceeding with extraction');
|
||||
});
|
||||
|
||||
// Allow render to settle.
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as RawDetailState | null;
|
||||
let detail: FeedDetail | null = null;
|
||||
|
||||
if (initialState) {
|
||||
detail = parseDetailFromState(initialState, feedId, xsecToken);
|
||||
if (detail) {
|
||||
log.debug({ feedId }, 'Extracted feed detail from __INITIAL_STATE__');
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping
|
||||
// -----------------------------------------------------------------------
|
||||
if (!detail) {
|
||||
log.debug({ feedId }, 'Falling back to DOM scraping for feed detail');
|
||||
detail = await scrapeDetailFromDom(page, feedId, xsecToken);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Load comments (from DOM — __INITIAL_STATE__ may not include them)
|
||||
// -----------------------------------------------------------------------
|
||||
if (detail.comments.length === 0 || loadAllComments) {
|
||||
const comments = await scrapeComments(page, loadAllComments);
|
||||
if (comments.length > 0) {
|
||||
detail.comments = comments;
|
||||
}
|
||||
}
|
||||
|
||||
log.info(
|
||||
{ feedId, commentCount: detail.comments.length, imageCount: detail.images.length },
|
||||
'Feed detail extraction complete',
|
||||
);
|
||||
|
||||
return detail;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse feed detail from __INITIAL_STATE__.
|
||||
*/
|
||||
function parseDetailFromState(
|
||||
state: RawDetailState,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): FeedDetail | null {
|
||||
// Try multiple possible locations for note data.
|
||||
let noteData: RawNoteData | undefined;
|
||||
|
||||
// Location 1: state.noteData.data.noteData (common structure)
|
||||
noteData = state.noteData?.data?.noteData;
|
||||
|
||||
// Location 2: state.noteData.noteData
|
||||
if (!noteData) {
|
||||
noteData = state.noteData?.noteData;
|
||||
}
|
||||
|
||||
// Location 3: state.note.noteDetailMap[feedId].note
|
||||
if (!noteData && state.note?.noteDetailMap) {
|
||||
const mapEntry = state.note.noteDetailMap[feedId];
|
||||
noteData = mapEntry?.note;
|
||||
}
|
||||
|
||||
// Location 4: state.note.note or state.note.noteData
|
||||
if (!noteData) {
|
||||
noteData = state.note?.note ?? state.note?.noteData;
|
||||
}
|
||||
|
||||
if (!noteData) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const id = noteData.noteId ?? noteData.id ?? feedId;
|
||||
const title = noteData.title ?? '';
|
||||
const description = noteData.desc ?? noteData.description ?? '';
|
||||
const rawType = noteData.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Images
|
||||
const rawImages = noteData.imageList ?? noteData.image_list ?? [];
|
||||
const images = rawImages
|
||||
.map((img) => {
|
||||
if (img.url) return ensureHttps(img.url);
|
||||
if (img.urlPre) return ensureHttps(img.urlPre);
|
||||
if (img.urlDefault) return ensureHttps(img.urlDefault);
|
||||
if (img.url_pre) return ensureHttps(img.url_pre);
|
||||
if (img.url_default) return ensureHttps(img.url_default);
|
||||
const info = img.infoList ?? img.info_list;
|
||||
if (info && info.length > 0 && info[0]?.url) return ensureHttps(info[0].url);
|
||||
return '';
|
||||
})
|
||||
.filter((url) => url !== '');
|
||||
|
||||
// Video URL
|
||||
let videoUrl: string | undefined;
|
||||
if (noteData.video) {
|
||||
const v = noteData.video;
|
||||
videoUrl =
|
||||
v.url ??
|
||||
v.media?.stream?.h264?.[0]?.masterUrl ??
|
||||
v.media?.stream?.h264?.[0]?.master_url ??
|
||||
v.media?.video?.url ??
|
||||
undefined;
|
||||
if (videoUrl) videoUrl = ensureHttps(videoUrl);
|
||||
}
|
||||
|
||||
// Tags
|
||||
const rawTags = noteData.tagList ?? noteData.tag_list ?? [];
|
||||
const tags = rawTags
|
||||
.map((t) => t.name ?? '')
|
||||
.filter((name) => name !== '');
|
||||
|
||||
// Interaction stats
|
||||
const interact = noteData.interactInfo ?? noteData.interact_info;
|
||||
const likeCount = parseCountString(
|
||||
interact?.likedCount ?? interact?.liked_count ?? '0',
|
||||
);
|
||||
const collectCount = parseCountString(
|
||||
interact?.collectedCount ?? interact?.collected_count ?? '0',
|
||||
);
|
||||
const commentCount = parseCountString(
|
||||
interact?.commentCount ?? interact?.comment_count ?? '0',
|
||||
);
|
||||
const shareCount = parseCountString(
|
||||
interact?.shareCount ?? interact?.share_count ?? '0',
|
||||
);
|
||||
|
||||
// Timestamps
|
||||
const createTimeRaw = noteData.time ?? noteData.createTime ?? noteData.create_time;
|
||||
const createTime = createTimeRaw
|
||||
? new Date(typeof createTimeRaw === 'number' && createTimeRaw < 1e12 ? createTimeRaw * 1000 : createTimeRaw).toISOString()
|
||||
: '';
|
||||
const lastUpdateTimeRaw = noteData.lastUpdateTime ?? noteData.last_update_time;
|
||||
const lastUpdateTime = lastUpdateTimeRaw
|
||||
? new Date(typeof lastUpdateTimeRaw === 'number' && lastUpdateTimeRaw < 1e12 ? lastUpdateTimeRaw * 1000 : lastUpdateTimeRaw).toISOString()
|
||||
: '';
|
||||
|
||||
const ipLocation = noteData.ipLocation ?? noteData.ip_location ?? '';
|
||||
|
||||
// User
|
||||
const rawUser = noteData.user;
|
||||
const user = {
|
||||
id: rawUser?.userId ?? rawUser?.user_id ?? '',
|
||||
nickname: rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '',
|
||||
avatar: rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '',
|
||||
};
|
||||
|
||||
// Comments from state (may be empty)
|
||||
const rawComments = noteData.comments ?? [];
|
||||
const comments = rawComments.map(parseRawComment).filter((c): c is Comment => c !== null);
|
||||
|
||||
const resolvedXsecToken = noteData.xsecToken ?? noteData.xsec_token ?? xsecToken;
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken: resolvedXsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
images,
|
||||
videoUrl,
|
||||
tags,
|
||||
likeCount,
|
||||
collectCount,
|
||||
commentCount,
|
||||
shareCount,
|
||||
createTime,
|
||||
lastUpdateTime,
|
||||
ipLocation,
|
||||
user,
|
||||
comments,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a raw comment from __INITIAL_STATE__ into a Comment object.
|
||||
*/
|
||||
function parseRawComment(raw: RawCommentData): Comment | null {
|
||||
const id = raw.id ?? '';
|
||||
if (!id) return null;
|
||||
|
||||
const userInfo = raw.userInfo;
|
||||
const userId = raw.userId ?? raw.user_id ?? userInfo?.userId ?? userInfo?.user_id ?? '';
|
||||
const nickname = userInfo?.nickname ?? userInfo?.nick_name ?? '';
|
||||
const avatar = userInfo?.image ?? userInfo?.avatar ?? '';
|
||||
const content = raw.content ?? '';
|
||||
|
||||
const likeCountRaw = raw.likeCount ?? raw.like_count ?? 0;
|
||||
const likeCount = typeof likeCountRaw === 'string'
|
||||
? parseCountString(likeCountRaw)
|
||||
: likeCountRaw;
|
||||
|
||||
const createTimeRaw = raw.createTime ?? raw.create_time;
|
||||
const createTime = createTimeRaw
|
||||
? new Date(typeof createTimeRaw === 'number' && createTimeRaw < 1e12 ? createTimeRaw * 1000 : createTimeRaw).toISOString()
|
||||
: '';
|
||||
|
||||
const ipLocation = raw.ipLocation ?? raw.ip_location ?? '';
|
||||
|
||||
const rawSubs = raw.subComments ?? raw.sub_comments ?? [];
|
||||
const subComments = rawSubs.map(parseRawComment).filter((c): c is Comment => c !== null);
|
||||
|
||||
return {
|
||||
id,
|
||||
userId,
|
||||
nickname,
|
||||
avatar,
|
||||
content,
|
||||
likeCount,
|
||||
createTime,
|
||||
ipLocation,
|
||||
subComments,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape feed detail from the rendered DOM using Playwright's Node-side
|
||||
* APIs ($eval, $$eval, $) to avoid needing DOM lib types.
|
||||
*/
|
||||
async function scrapeDetailFromDom(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): Promise<FeedDetail> {
|
||||
// Title
|
||||
const title = await page
|
||||
.$eval(SEL.title, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Description
|
||||
const description = await page
|
||||
.$eval(SEL.description, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Images — try image list first, then hero image.
|
||||
let images: string[] = await page
|
||||
.$$eval(SEL.images, (imgs) =>
|
||||
imgs.map((img) => img.getAttribute('src') ?? '').filter(Boolean),
|
||||
)
|
||||
.catch(() => [] as string[]);
|
||||
|
||||
if (images.length === 0) {
|
||||
const heroSrc = await page
|
||||
.$eval(SEL.heroImage, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (heroSrc) images = [heroSrc];
|
||||
}
|
||||
|
||||
// Video URL
|
||||
let videoUrl: string | undefined;
|
||||
const videoSrc = await page
|
||||
.$eval(SEL.video, (video) => video.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (videoSrc) {
|
||||
videoUrl = videoSrc;
|
||||
} else {
|
||||
const sourceSrc = await page
|
||||
.$eval(SEL.videoSource, (source) => source.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (sourceSrc) videoUrl = sourceSrc;
|
||||
}
|
||||
|
||||
const type: 'normal' | 'video' = videoUrl ? 'video' : 'normal';
|
||||
|
||||
// Tags
|
||||
const tags: string[] = await page
|
||||
.$$eval(SEL.tags, (els) =>
|
||||
els
|
||||
.map((el) => el.textContent?.trim().replace(/^#/, '') ?? '')
|
||||
.filter(Boolean),
|
||||
)
|
||||
.catch(() => [] as string[]);
|
||||
|
||||
// Stats
|
||||
const likeCount = await extractCount(page, SEL.likeCount);
|
||||
const collectCount = await extractCount(page, SEL.collectCount);
|
||||
const commentCount = await extractCount(page, SEL.commentCount);
|
||||
const shareCount = await extractCount(page, SEL.shareCount);
|
||||
|
||||
// Create time
|
||||
const createTime = await page
|
||||
.$eval(SEL.createTime, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// IP location
|
||||
const ipLocation = await page
|
||||
.$eval(SEL.ipLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Author info
|
||||
const authorName = await page
|
||||
.$eval(SEL.authorName, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const authorAvatar = await page
|
||||
.$eval(SEL.authorAvatar, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Extract author ID from the author link href.
|
||||
const authorLinkHref = await page
|
||||
.$eval(SEL.authorLink, (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const authorIdMatch = authorLinkHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const authorId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
return {
|
||||
id: feedId,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
images,
|
||||
videoUrl,
|
||||
tags,
|
||||
likeCount,
|
||||
collectCount,
|
||||
commentCount,
|
||||
shareCount,
|
||||
createTime,
|
||||
lastUpdateTime: '',
|
||||
ipLocation,
|
||||
user: {
|
||||
id: authorId,
|
||||
nickname: authorName,
|
||||
avatar: authorAvatar,
|
||||
},
|
||||
comments: [],
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Comment scraping from DOM — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape comments from the note detail page DOM.
|
||||
*
|
||||
* @param page - The current Playwright page (already on the detail URL).
|
||||
* @param loadAllComments - If true, clicks "show more" buttons repeatedly.
|
||||
* @returns An array of Comment objects.
|
||||
*/
|
||||
async function scrapeComments(
|
||||
page: Page,
|
||||
loadAllComments: boolean,
|
||||
): Promise<Comment[]> {
|
||||
// Scroll down to the comments section to trigger lazy loading.
|
||||
// Use a string expression to avoid needing DOM types.
|
||||
await page.evaluate(`
|
||||
(() => {
|
||||
const commentsArea = document.querySelector('.comments-container');
|
||||
if (commentsArea) {
|
||||
commentsArea.scrollIntoView({ behavior: 'smooth' });
|
||||
} else {
|
||||
window.scrollTo(0, document.body.scrollHeight);
|
||||
}
|
||||
})()
|
||||
`);
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// If loadAllComments, keep clicking "show more" until it disappears or
|
||||
// we hit the maximum click limit.
|
||||
if (loadAllComments) {
|
||||
let clicks = 0;
|
||||
while (clicks < MAX_LOAD_MORE_CLICKS) {
|
||||
const showMoreBtn = await page.$(SEL.showMoreComments);
|
||||
if (!showMoreBtn) break;
|
||||
|
||||
const isVisible = await showMoreBtn.isVisible().catch(() => false);
|
||||
if (!isVisible) break;
|
||||
|
||||
await showMoreBtn.click().catch(() => {});
|
||||
await page.waitForTimeout(LOAD_MORE_DELAY_MS);
|
||||
clicks++;
|
||||
}
|
||||
|
||||
if (clicks > 0) {
|
||||
log.debug({ clicks }, 'Clicked "show more comments" button');
|
||||
}
|
||||
}
|
||||
|
||||
// Now extract all visible comments using Playwright Node-side API.
|
||||
const commentElements = await page.$$(SEL.commentItem);
|
||||
const comments: Comment[] = [];
|
||||
|
||||
for (const commentEl of commentElements) {
|
||||
try {
|
||||
const comment = await parseCommentElement(commentEl);
|
||||
if (comment) {
|
||||
comments.push(comment);
|
||||
}
|
||||
} catch {
|
||||
// Skip comments that fail to parse.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return comments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a single comment element into a Comment object using Playwright
|
||||
* Node-side API.
|
||||
*/
|
||||
async function parseCommentElement(
|
||||
commentEl: ElementHandle,
|
||||
): Promise<Comment | null> {
|
||||
const content = await commentEl
|
||||
.$eval(SEL.commentContent, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const nickname = await commentEl
|
||||
.$eval(SEL.commentAuthor, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const avatar = await commentEl
|
||||
.$eval(SEL.commentAvatar, (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const likeText = await commentEl
|
||||
.$eval(SEL.commentLikeCount, (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
|
||||
const createTime = await commentEl
|
||||
.$eval(SEL.commentTime, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const ipLocation = await commentEl
|
||||
.$eval(SEL.commentIpLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Try to extract comment ID from the element's attributes.
|
||||
const commentId = await commentEl.evaluate(
|
||||
(el) =>
|
||||
el.getAttribute('id') ??
|
||||
el.getAttribute('data-id') ??
|
||||
el.getAttribute('data-comment-id') ??
|
||||
'',
|
||||
);
|
||||
|
||||
// Try to extract user ID from an author link.
|
||||
const authorHref = await commentEl
|
||||
.$eval('a[href*="/user/profile/"]', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const userIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = userIdMatch?.[1] ?? '';
|
||||
|
||||
// Sub-comments (replies)
|
||||
const subCommentElements = await commentEl.$$(SEL.subCommentItem);
|
||||
const subComments: Comment[] = [];
|
||||
|
||||
for (const subEl of subCommentElements) {
|
||||
try {
|
||||
const subContent = await subEl
|
||||
.$eval(SEL.commentContent, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const subAuthor = await subEl
|
||||
.$eval(SEL.commentAuthor, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const subAvatar = await subEl
|
||||
.$eval(SEL.commentAvatar, (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
const subLikeText = await subEl
|
||||
.$eval(SEL.commentLikeCount, (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
const subTime = await subEl
|
||||
.$eval(SEL.commentTime, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const subIp = await subEl
|
||||
.$eval(SEL.commentIpLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const subId = await subEl.evaluate(
|
||||
(el) =>
|
||||
el.getAttribute('id') ??
|
||||
el.getAttribute('data-id') ??
|
||||
el.getAttribute('data-comment-id') ??
|
||||
'',
|
||||
);
|
||||
|
||||
const subAuthorHref = await subEl
|
||||
.$eval('a[href*="/user/profile/"]', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const subUserIdMatch = subAuthorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
|
||||
subComments.push({
|
||||
id: subId,
|
||||
userId: subUserIdMatch?.[1] ?? '',
|
||||
nickname: subAuthor,
|
||||
avatar: subAvatar,
|
||||
content: subContent,
|
||||
likeCount: parseCountString(subLikeText),
|
||||
createTime: subTime,
|
||||
ipLocation: subIp,
|
||||
subComments: [],
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: commentId,
|
||||
userId,
|
||||
nickname,
|
||||
avatar,
|
||||
content,
|
||||
likeCount: parseCountString(likeText),
|
||||
createTime,
|
||||
ipLocation,
|
||||
subComments,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract a numeric count from an element on the page, handling
|
||||
* abbreviations like "1.2w" and "3k".
|
||||
*/
|
||||
async function extractCount(page: Page, selector: string): Promise<number> {
|
||||
const text = await page
|
||||
.$eval(selector, (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
return parseCountString(text);
|
||||
}
|
||||
@@ -0,0 +1,401 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import type { Feed } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
const log = logger.child({ module: 'xhs-feeds' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types (partial — only the fields we care about)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Shape of a single feed item inside __INITIAL_STATE__.homeFeed.feeds */
|
||||
interface RawFeedItem {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
type?: string;
|
||||
noteCard?: RawNoteCard;
|
||||
model_type?: string;
|
||||
cover?: RawImage;
|
||||
user?: RawUser;
|
||||
interactInfo?: RawInteractInfo;
|
||||
interact_info?: RawInteractInfo;
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
interface RawNoteCard {
|
||||
noteId?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: RawImage;
|
||||
user?: RawUser;
|
||||
interactInfo?: RawInteractInfo;
|
||||
interact_info?: RawInteractInfo;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
}
|
||||
|
||||
interface RawImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawInteractInfo {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Partial shape of the __INITIAL_STATE__ global variable.
|
||||
* Xiaohongshu places SSR data here for hydration.
|
||||
*/
|
||||
interface InitialState {
|
||||
homeFeed?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
feed?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
explore?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// listFeeds — extract feeds from the explore page
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to the Xiaohongshu explore (home) page and extract the feed list
|
||||
* from the server-rendered `__INITIAL_STATE__` global variable.
|
||||
*
|
||||
* Falls back to DOM scraping if `__INITIAL_STATE__` is unavailable or does
|
||||
* not contain feed data.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @returns An array of Feed objects.
|
||||
*/
|
||||
export async function listFeeds(page: Page): Promise<Feed[]> {
|
||||
log.debug('Navigating to explore page');
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Allow the page a moment for client-side hydration to settle.
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page);
|
||||
|
||||
if (initialState) {
|
||||
const feeds = parseFeedsFromState(initialState);
|
||||
if (feeds.length > 0) {
|
||||
log.info({ count: feeds.length }, 'Extracted feeds from __INITIAL_STATE__');
|
||||
return feeds;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no feeds extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping using Playwright Node-side API
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug('Falling back to DOM scraping for feed list');
|
||||
const feeds = await scrapeFeedsFromDom(page);
|
||||
log.info({ count: feeds.length }, 'Extracted feeds from DOM');
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ extraction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Attempt to extract the `__INITIAL_STATE__` object from the page.
|
||||
* Returns `null` if the variable is not present or not an object.
|
||||
*
|
||||
* The evaluate callback runs in the browser context. We return `unknown`
|
||||
* and cast on the Node side to avoid needing DOM lib types.
|
||||
*/
|
||||
async function extractInitialState(page: Page): Promise<InitialState | null> {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const state: unknown = await page.evaluate('window.__INITIAL_STATE__');
|
||||
|
||||
if (state && typeof state === 'object') {
|
||||
return state as InitialState;
|
||||
}
|
||||
|
||||
log.debug('__INITIAL_STATE__ is not present or not an object');
|
||||
return null;
|
||||
} catch (err: unknown) {
|
||||
log.warn({ err }, 'Failed to extract __INITIAL_STATE__');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Feed parsing from __INITIAL_STATE__
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse the raw initial state into structured Feed objects.
|
||||
* Handles multiple possible shapes of the state data (Xiaohongshu has
|
||||
* changed the structure over time).
|
||||
*/
|
||||
function parseFeedsFromState(state: InitialState): Feed[] {
|
||||
// Try multiple known locations for the feed list.
|
||||
const rawFeeds: RawFeedItem[] =
|
||||
state.homeFeed?.feeds ??
|
||||
state.feed?.feeds ??
|
||||
state.explore?.feeds ??
|
||||
[];
|
||||
|
||||
if (rawFeeds.length === 0) {
|
||||
// Walk top-level keys looking for an array that resembles feeds.
|
||||
for (const key of Object.keys(state)) {
|
||||
const value = state[key];
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const obj = value as Record<string, unknown>;
|
||||
if (Array.isArray(obj['feeds'])) {
|
||||
return (obj['feeds'] as RawFeedItem[])
|
||||
.map(parseRawFeedItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawFeeds
|
||||
.map(parseRawFeedItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a single raw feed item into a structured Feed, or return null if
|
||||
* insufficient data is available.
|
||||
*/
|
||||
function parseRawFeedItem(raw: RawFeedItem): Feed | null {
|
||||
// The feed data can be either flat or nested under `noteCard`.
|
||||
const card = raw.noteCard;
|
||||
|
||||
const id =
|
||||
raw.id ?? raw.noteId ?? raw.note_id ?? card?.noteId ?? '';
|
||||
const xsecToken =
|
||||
raw.xsecToken ?? raw.xsec_token ?? card?.xsecToken ?? card?.xsec_token ?? '';
|
||||
const title =
|
||||
raw.displayTitle ?? raw.display_title ?? raw.title ??
|
||||
card?.displayTitle ?? card?.display_title ?? card?.title ?? '';
|
||||
const description =
|
||||
raw.desc ?? raw.description ?? card?.desc ?? '';
|
||||
|
||||
// Type detection — default to 'normal' if unclear.
|
||||
const rawType = raw.type ?? raw.model_type ?? card?.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Cover image URL — try multiple possible locations.
|
||||
const rawCover = raw.cover ?? card?.cover;
|
||||
const coverUrl = extractImageUrl(rawCover);
|
||||
|
||||
// User info
|
||||
const rawUser = raw.user ?? card?.user;
|
||||
const userId = rawUser?.userId ?? rawUser?.user_id ?? '';
|
||||
const nickname =
|
||||
rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '';
|
||||
const avatar =
|
||||
rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '';
|
||||
|
||||
// Like count — can be in interactInfo, or directly on the item.
|
||||
const interactInfo = raw.interactInfo ?? raw.interact_info ?? card?.interactInfo ?? card?.interact_info;
|
||||
const likeCountStr =
|
||||
interactInfo?.likedCount ?? interactInfo?.liked_count ??
|
||||
interactInfo?.likeCount ?? interactInfo?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
// Must have at least an ID to be a valid feed.
|
||||
if (!id) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: {
|
||||
id: userId,
|
||||
nickname,
|
||||
avatar,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape feed data using Playwright's Node-side selectors (`page.$$`,
|
||||
* `page.$eval`) to avoid needing DOM lib types in our TypeScript config.
|
||||
*/
|
||||
async function scrapeFeedsFromDom(page: Page): Promise<Feed[]> {
|
||||
// Wait for at least one feed card to appear.
|
||||
await page.waitForSelector('.note-item', { timeout: 10_000 }).catch(() => null);
|
||||
|
||||
const cardElements = await page.$$('.note-item');
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of cardElements) {
|
||||
try {
|
||||
// Extract feed link to get ID and xsec_token from the URL.
|
||||
const href = await card.$eval('a.cover', (el) => el.getAttribute('href') ?? '').catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const xsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
// Cover image
|
||||
const coverUrl = await card.$eval('a.cover img', (el) => el.getAttribute('src') ?? '').catch(() => '');
|
||||
|
||||
// Title
|
||||
const title = await card.$eval('.footer .title', (el) => el.textContent?.trim() ?? '').catch(() => '');
|
||||
|
||||
// Author name
|
||||
const nickname = await card.$eval('.footer .author-wrapper .name', (el) => el.textContent?.trim() ?? '').catch(() => '');
|
||||
|
||||
// Author avatar
|
||||
const avatar = await card.$eval('.footer .author-wrapper .author-head img', (el) => el.getAttribute('src') ?? '').catch(() => '');
|
||||
|
||||
// Author ID from link
|
||||
const authorHref = await card.$eval('.footer .author-wrapper a', (el) => el.getAttribute('href') ?? '').catch(() => '');
|
||||
const authorIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
// Like count
|
||||
const likeText = await card.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0').catch(() => '0');
|
||||
const likeCount = parseCountString(likeText);
|
||||
|
||||
// Type — check if there is a video icon.
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
});
|
||||
} catch {
|
||||
// Skip cards that fail to parse.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract an image URL from the raw cover image object, trying multiple
|
||||
* possible field names.
|
||||
*/
|
||||
function extractImageUrl(raw: RawImage | undefined): string {
|
||||
if (!raw) return '';
|
||||
|
||||
// Direct URL fields
|
||||
if (raw.url) return ensureHttps(raw.url);
|
||||
if (raw.urlPre) return ensureHttps(raw.urlPre);
|
||||
if (raw.urlDefault) return ensureHttps(raw.urlDefault);
|
||||
if (raw.url_pre) return ensureHttps(raw.url_pre);
|
||||
if (raw.url_default) return ensureHttps(raw.url_default);
|
||||
|
||||
// infoList — array of image variants, take the first.
|
||||
const infoList = raw.infoList ?? raw.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
return ensureHttps(infoList[0].url);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure a URL has an https:// prefix. Xiaohongshu sometimes returns
|
||||
* protocol-relative URLs (//sns-...) or bare http.
|
||||
*/
|
||||
function ensureHttps(url: string): string {
|
||||
if (url.startsWith('//')) return `https:${url}`;
|
||||
if (url.startsWith('http://')) return url.replace('http://', 'https://');
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a count string that may contain abbreviations like "1.2w" (万) or
|
||||
* "3k" into a number.
|
||||
*/
|
||||
function parseCountString(str: string): number {
|
||||
if (!str) return 0;
|
||||
const cleaned = str.replace(/,/g, '').trim().toLowerCase();
|
||||
if (cleaned.includes('w') || cleaned.includes('万')) {
|
||||
return Math.round(parseFloat(cleaned) * 10_000);
|
||||
}
|
||||
if (cleaned.includes('k') || cleaned.includes('千')) {
|
||||
return Math.round(parseFloat(cleaned) * 1_000);
|
||||
}
|
||||
const n = parseInt(cleaned, 10);
|
||||
return isNaN(n) ? 0 : n;
|
||||
}
|
||||
|
||||
// Re-export for use by other modules (search, user-profile, feed-detail)
|
||||
// that need the same extraction / parsing helpers.
|
||||
export { extractInitialState, parseCountString, ensureHttps };
|
||||
@@ -0,0 +1,537 @@
|
||||
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import type { Router } from 'express';
|
||||
|
||||
import type { BrowserManager } from '../../browser/manager.js';
|
||||
import { config } from '../../config/index.js';
|
||||
import { withErrorHandling } from '../../utils/errors.js';
|
||||
import { validateMediaPath } from '../../utils/downloader.js';
|
||||
import { checkLoginStatus, getLoginQRCode, deleteCookies } from './login.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { getFeedDetail } from './feed-detail.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
import { publishImageNote } from './publish.js';
|
||||
import { publishVideoNote } from './publish-video.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { toggleLike, toggleFavorite } from './interaction.js';
|
||||
import { createXhsRoutes } from './routes.js';
|
||||
import {
|
||||
CheckLoginSchema,
|
||||
GetLoginQRCodeSchema,
|
||||
DeleteCookiesSchema,
|
||||
ListFeedsSchema,
|
||||
SearchSchema,
|
||||
GetFeedDetailSchema,
|
||||
GetUserProfileSchema,
|
||||
PublishImageSchema,
|
||||
PublishVideoSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
LikeSchema,
|
||||
FavoriteSchema,
|
||||
} from './schemas.js';
|
||||
import type { SearchFilters } from './types.js';
|
||||
import type { PlatformPlugin } from '../../server/app.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
|
||||
/** Maximum file size for video uploads (500 MB). */
|
||||
const VIDEO_MAX_SIZE_MB = 500;
|
||||
|
||||
/** Maximum file size for image uploads (20 MB — default in validateMediaPath). */
|
||||
const IMAGE_MAX_SIZE_MB = 20;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PlatformPlugin implementation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const xiaohongshuPlugin: PlatformPlugin = {
|
||||
name: PLATFORM,
|
||||
|
||||
// =========================================================================
|
||||
// REST API routes (Phase 5)
|
||||
// =========================================================================
|
||||
|
||||
registerRoutes(router: Router, browser: BrowserManager): void {
|
||||
const xhsRouter = createXhsRoutes(browser);
|
||||
router.use('/', xhsRouter);
|
||||
},
|
||||
|
||||
// =========================================================================
|
||||
// MCP tools
|
||||
// =========================================================================
|
||||
|
||||
registerTools(server: McpServer, browser: BrowserManager): void {
|
||||
// =====================================================================
|
||||
// Phase 2: Login management (3 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_check_login
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_check_login',
|
||||
'Check Xiaohongshu login status',
|
||||
CheckLoginSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_check_login', async () => {
|
||||
const timeoutMs = config.operationTimeouts['login'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const status = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => checkLoginStatus(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(status),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_get_login_qrcode
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_get_login_qrcode',
|
||||
'Get Xiaohongshu login QR code (user scans with phone)',
|
||||
GetLoginQRCodeSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_get_login_qrcode', async () => {
|
||||
const result = await getLoginQRCode(browser);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_delete_cookies
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_delete_cookies',
|
||||
'Delete Xiaohongshu cookies and reset login session',
|
||||
DeleteCookiesSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_delete_cookies', async () => {
|
||||
await deleteCookies(browser);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify({ success: true, message: 'Cookies deleted' }),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// =====================================================================
|
||||
// Phase 3: Content browsing (4 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_list_feeds
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_list_feeds',
|
||||
'Get Xiaohongshu explore page recommended feed list',
|
||||
ListFeedsSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_list_feeds', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => listFeeds(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(feeds),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_search
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_search',
|
||||
'Search Xiaohongshu notes by keyword with optional filters (sort, type, time range)',
|
||||
SearchSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_search', async () => {
|
||||
const timeoutMs = config.operationTimeouts['search'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const filters: SearchFilters | undefined = args.filters
|
||||
? {
|
||||
sort: args.filters.sort,
|
||||
type: args.filters.type,
|
||||
time: args.filters.time,
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => searchFeeds(page, args.keyword, filters),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(feeds),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_get_feed_detail
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_get_feed_detail',
|
||||
'Get Xiaohongshu note detail including content, images, stats, and comments',
|
||||
GetFeedDetailSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_get_feed_detail', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const detail = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getFeedDetail(
|
||||
page,
|
||||
args.feed_id,
|
||||
args.xsec_token,
|
||||
args.load_all_comments,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(detail),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_get_user_profile
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_get_user_profile',
|
||||
'Get Xiaohongshu user profile information including bio, stats, and recent notes',
|
||||
GetUserProfileSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_get_user_profile', async () => {
|
||||
const timeoutMs = config.operationTimeouts['user_profile'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const profile = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getUserProfile(page, args.user_id, args.xsec_token),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(profile),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// =====================================================================
|
||||
// Phase 4: Content publishing (2 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_publish_image
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_publish_image',
|
||||
'Publish an image note on Xiaohongshu. Provide local file paths for images.',
|
||||
PublishImageSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_publish_image', async () => {
|
||||
// Fail fast: validate all image paths BEFORE acquiring a browser page.
|
||||
const validatedPaths: string[] = [];
|
||||
for (const imagePath of args.images) {
|
||||
const resolved = await validateMediaPath(imagePath, {
|
||||
maxSizeMB: IMAGE_MAX_SIZE_MB,
|
||||
});
|
||||
validatedPaths.push(resolved);
|
||||
}
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishImageNote(page, args.title, args.content, validatedPaths, {
|
||||
tags: args.tags,
|
||||
scheduleAt: args.schedule_at,
|
||||
isOriginal: args.is_original,
|
||||
visibility: args.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_publish_video
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_publish_video',
|
||||
'Publish a video note on Xiaohongshu. Provide a local file path for the video.',
|
||||
PublishVideoSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_publish_video', async () => {
|
||||
// Fail fast: validate the video path BEFORE acquiring a browser page.
|
||||
const validatedPath = await validateMediaPath(args.video, {
|
||||
maxSizeMB: VIDEO_MAX_SIZE_MB,
|
||||
});
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishVideoNote(page, args.title, args.content, validatedPath, {
|
||||
tags: args.tags,
|
||||
scheduleAt: args.schedule_at,
|
||||
visibility: args.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// =====================================================================
|
||||
// Phase 4: Interactions (4 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_post_comment
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_post_comment',
|
||||
'Post a comment on a Xiaohongshu note',
|
||||
PostCommentSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_post_comment', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['comment'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
postComment(page, args.feed_id, args.xsec_token, args.content),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_reply_comment
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_reply_comment',
|
||||
'Reply to a comment on a Xiaohongshu note',
|
||||
ReplyCommentSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_reply_comment', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['reply'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
replyComment(
|
||||
page,
|
||||
args.feed_id,
|
||||
args.xsec_token,
|
||||
args.content,
|
||||
args.comment_id,
|
||||
args.user_id,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_like
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_like',
|
||||
'Like or unlike a Xiaohongshu note',
|
||||
LikeSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_like', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['like'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleLike(page, args.feed_id, args.xsec_token, args.unlike),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_favorite
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_favorite',
|
||||
'Favorite or unfavorite a Xiaohongshu note',
|
||||
FavoriteSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_favorite', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['favorite'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleFavorite(
|
||||
page,
|
||||
args.feed_id,
|
||||
args.xsec_token,
|
||||
args.unfavorite,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,214 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Wait after clicking like/favorite to let the state update. */
|
||||
const TOGGLE_SETTLE_MS = 1_000;
|
||||
|
||||
const selInteraction = XHS_SELECTORS.interaction;
|
||||
const selDetail = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-interaction' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toggleLike
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Like or unlike a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param unlike - If true, unlike the note (toggle off). Default: false.
|
||||
* @returns Object with success status and the resulting liked state.
|
||||
*/
|
||||
export async function toggleLike(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
unlike?: boolean,
|
||||
): Promise<{ success: boolean; liked: boolean }> {
|
||||
log.info({ feedId, unlike: unlike ?? false }, 'Toggling like on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note container and interaction bar to be visible.
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Check the current like state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isCurrentlyLiked = await isElementActive(
|
||||
page,
|
||||
selInteraction.likeButtonActive,
|
||||
);
|
||||
|
||||
log.debug({ isCurrentlyLiked, desiredUnlike: unlike ?? false }, 'Current like state');
|
||||
|
||||
// Determine whether we need to toggle.
|
||||
// - unlike=true means we want the note to NOT be liked → toggle only if currently liked.
|
||||
// - unlike=false means we want the note to BE liked → toggle only if currently not liked.
|
||||
const shouldToggle = unlike ? isCurrentlyLiked : !isCurrentlyLiked;
|
||||
|
||||
if (!shouldToggle) {
|
||||
// Already in the desired state — no action needed.
|
||||
const liked = !unlike;
|
||||
log.info({ feedId, liked, alreadyInState: true }, 'Like already in desired state');
|
||||
return { success: true, liked };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Click the like button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const likeBtn = await page.$(selInteraction.likeButton);
|
||||
|
||||
if (!likeBtn) {
|
||||
log.warn('Like button not found on feed detail page');
|
||||
return { success: false, liked: isCurrentlyLiked };
|
||||
}
|
||||
|
||||
await likeBtn.click();
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Verify the new state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isNowLiked = await isElementActive(
|
||||
page,
|
||||
selInteraction.likeButtonActive,
|
||||
);
|
||||
|
||||
const expectedLiked = !unlike;
|
||||
const success = isNowLiked === expectedLiked;
|
||||
|
||||
log.info({ feedId, liked: isNowLiked, success }, 'Like toggle complete');
|
||||
|
||||
return { success, liked: isNowLiked };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toggleFavorite
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Favorite or unfavorite a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param unfavorite - If true, unfavorite the note (toggle off). Default: false.
|
||||
* @returns Object with success status and the resulting favorited state.
|
||||
*/
|
||||
export async function toggleFavorite(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
unfavorite?: boolean,
|
||||
): Promise<{ success: boolean; favorited: boolean }> {
|
||||
log.info({ feedId, unfavorite: unfavorite ?? false }, 'Toggling favorite on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Check the current favorite state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isCurrentlyFavorited = await isElementActive(
|
||||
page,
|
||||
selInteraction.favoriteButtonActive,
|
||||
);
|
||||
|
||||
log.debug(
|
||||
{ isCurrentlyFavorited, desiredUnfavorite: unfavorite ?? false },
|
||||
'Current favorite state',
|
||||
);
|
||||
|
||||
const shouldToggle = unfavorite ? isCurrentlyFavorited : !isCurrentlyFavorited;
|
||||
|
||||
if (!shouldToggle) {
|
||||
const favorited = !unfavorite;
|
||||
log.info(
|
||||
{ feedId, favorited, alreadyInState: true },
|
||||
'Favorite already in desired state',
|
||||
);
|
||||
return { success: true, favorited };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Click the favorite button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const favBtn = await page.$(selInteraction.favoriteButton);
|
||||
|
||||
if (!favBtn) {
|
||||
log.warn('Favorite button not found on feed detail page');
|
||||
return { success: false, favorited: isCurrentlyFavorited };
|
||||
}
|
||||
|
||||
await favBtn.click();
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Verify the new state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isNowFavorited = await isElementActive(
|
||||
page,
|
||||
selInteraction.favoriteButtonActive,
|
||||
);
|
||||
|
||||
const expectedFavorited = !unfavorite;
|
||||
const success = isNowFavorited === expectedFavorited;
|
||||
|
||||
log.info({ feedId, favorited: isNowFavorited, success }, 'Favorite toggle complete');
|
||||
|
||||
return { success, favorited: isNowFavorited };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the URL for a feed detail page.
|
||||
*/
|
||||
function buildFeedUrl(feedId: string, xsecToken: string): string {
|
||||
return `${FEED_DETAIL_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_search`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether an element matching the given selector exists on the page.
|
||||
* Used to determine the active/inactive state of like/favorite buttons.
|
||||
*
|
||||
* The selector for the "active" state uses CSS classes that are only present
|
||||
* when the button is in its toggled-on state (e.g. `.like-wrapper.active`).
|
||||
*/
|
||||
async function isElementActive(page: Page, selector: string): Promise<boolean> {
|
||||
const el = await page.$(selector);
|
||||
return el !== null;
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import type { BrowserManager } from '../../browser/manager.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { cookieStore } from '../../cookie/store.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import type { LoginStatus, QRCodeResult } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** How long to wait for the user to scan the QR code (4 minutes). */
|
||||
const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000;
|
||||
|
||||
const log = logger.child({ module: 'xhs-login' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// checkLoginStatus
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to the explore page and determine whether the user is logged in
|
||||
* by checking for the presence of the logged-in indicator element.
|
||||
*
|
||||
* @param page - A Playwright Page already managed by the caller.
|
||||
* @returns An object indicating login status.
|
||||
*/
|
||||
export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Give the page a moment for client-side rendering to settle.
|
||||
const indicator = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 5_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (!indicator) {
|
||||
return { loggedIn: false };
|
||||
}
|
||||
|
||||
// Attempt to extract a username from the indicator area.
|
||||
const username = await indicator.textContent().catch(() => null);
|
||||
|
||||
return {
|
||||
loggedIn: true,
|
||||
...(username ? { username: username.trim() } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getLoginQRCode
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Open the explore page, trigger the login modal if needed, and extract the
|
||||
* QR code image data.
|
||||
*
|
||||
* Because the user must scan the QR code with their phone (which takes an
|
||||
* indeterminate amount of time), this function uses `acquirePage` instead of
|
||||
* `withPage`. A fire-and-forget background task waits for the scan to
|
||||
* complete, saves cookies, and releases the page.
|
||||
*
|
||||
* @param browser - The shared BrowserManager instance.
|
||||
* @returns QR code data or an indication that the user is already logged in.
|
||||
*/
|
||||
export async function getLoginQRCode(
|
||||
browser: BrowserManager,
|
||||
): Promise<QRCodeResult> {
|
||||
const { page, release } = await browser.acquirePage(PLATFORM);
|
||||
|
||||
try {
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check whether the user is already logged in.
|
||||
const alreadyLoggedIn = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
if (alreadyLoggedIn) {
|
||||
await release();
|
||||
return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' };
|
||||
}
|
||||
|
||||
// If the QR code is not visible yet, click the login button to open it.
|
||||
const qrVisible = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 3_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
if (!qrVisible) {
|
||||
const loginBtn = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loginButton, { timeout: 5_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (loginBtn) {
|
||||
await loginBtn.click();
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the QR code image to appear.
|
||||
const qrElement = await page.waitForSelector(
|
||||
XHS_SELECTORS.login.qrCodeImage,
|
||||
{ timeout: 10_000 },
|
||||
);
|
||||
|
||||
const qrcodeData = await qrElement.getAttribute('src');
|
||||
|
||||
if (!qrcodeData) {
|
||||
await release();
|
||||
throw new Error('QR code image src attribute is empty');
|
||||
}
|
||||
|
||||
// Fire-and-forget: wait for the user to scan the QR code in the
|
||||
// background. On success, save cookies and release the page. On
|
||||
// failure or timeout, just release the page. The `.catch()` ensures
|
||||
// no unhandled rejection escapes.
|
||||
waitForLoginAndRelease(page, browser, release).catch((err: unknown) => {
|
||||
log.error({ err }, 'Login wait flow encountered an unexpected error');
|
||||
});
|
||||
|
||||
return {
|
||||
qrcodeData,
|
||||
alreadyLoggedIn: false,
|
||||
timeout: '4m',
|
||||
};
|
||||
} catch (err) {
|
||||
// If anything goes wrong before we hand off to the background task,
|
||||
// make sure the page is released.
|
||||
await release();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// deleteCookies
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Delete persisted cookies for the Xiaohongshu platform.
|
||||
*
|
||||
* @param _browser - The shared BrowserManager instance (unused for now but
|
||||
* passed for consistency; a future version may also clear
|
||||
* the in-memory browser context).
|
||||
*/
|
||||
export async function deleteCookies(_browser: BrowserManager): Promise<void> {
|
||||
await cookieStore.delete(PLATFORM);
|
||||
log.info('Xiaohongshu cookies deleted');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal: waitForLoginAndRelease
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Background task that waits for the logged-in indicator to appear (meaning
|
||||
* the user has scanned the QR code). On success it persists cookies. The
|
||||
* page is released in all cases (success, timeout, error) via `finally`.
|
||||
*
|
||||
* `release` is idempotent (guaranteed by BrowserManager.acquirePage), so
|
||||
* even if the safety-net timer inside acquirePage fires concurrently, there
|
||||
* is no double-close.
|
||||
*/
|
||||
async function waitForLoginAndRelease(
|
||||
page: Page,
|
||||
browser: BrowserManager,
|
||||
release: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
try {
|
||||
await page.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, {
|
||||
timeout: QR_SCAN_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
log.info('QR code scanned — login detected, saving cookies');
|
||||
await browser.saveCookies(PLATFORM);
|
||||
} catch {
|
||||
// Timeout or page closed — not an error, just means the user did not
|
||||
// scan in time (or the page was released by the safety timer).
|
||||
log.debug('Login wait ended without successful scan');
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,313 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CREATOR_PUBLISH_URL = 'https://creator.xiaohongshu.com/publish/publish';
|
||||
|
||||
/**
|
||||
* Maximum time to wait for video upload to complete.
|
||||
* Videos can be up to 500 MB, so we allow up to 4 minutes for upload.
|
||||
*/
|
||||
const VIDEO_UPLOAD_TIMEOUT_MS = 240_000;
|
||||
|
||||
/** Wait after upload completes to let the UI settle. */
|
||||
const UPLOAD_SETTLE_MS = 2_000;
|
||||
|
||||
/** Wait after filling form fields. */
|
||||
const FIELD_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after clicking publish before checking result. */
|
||||
const PUBLISH_SETTLE_MS = 3_000;
|
||||
|
||||
const sel = XHS_SELECTORS.publish;
|
||||
|
||||
const log = logger.child({ module: 'xhs-publish-video' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// publishVideoNote
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PublishVideoOptions {
|
||||
tags?: string[];
|
||||
scheduleAt?: string;
|
||||
visibility?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Publish a video note on Xiaohongshu via the creator center UI.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param title - Note title (required, non-empty).
|
||||
* @param content - Note body / description text.
|
||||
* @param videoPath - Local path to the video file (already validated by caller).
|
||||
* @param options - Optional tags, schedule, and visibility.
|
||||
* @returns Object indicating success and an optional noteId if detectable.
|
||||
*/
|
||||
export async function publishVideoNote(
|
||||
page: Page,
|
||||
title: string,
|
||||
content: string,
|
||||
videoPath: string,
|
||||
options?: PublishVideoOptions,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
log.info(
|
||||
{ hasOptions: !!options },
|
||||
'Starting video note publish',
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the creator publish page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.goto(CREATOR_PUBLISH_URL, { waitUntil: 'domcontentloaded' });
|
||||
// Allow the SPA to hydrate.
|
||||
await page.waitForTimeout(2_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Upload the video via the file input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// The creator page uses the same file input for both images and video.
|
||||
// The platform detects the file type from the uploaded content.
|
||||
const fileInput = await page.waitForSelector(sel.imageFileInput, {
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
await fileInput.setInputFiles(videoPath);
|
||||
|
||||
log.debug('Video file set on input element, waiting for upload to complete');
|
||||
|
||||
// Wait for the video thumbnail / player to appear, indicating upload is done.
|
||||
// Video uploads take significantly longer than images.
|
||||
await page.waitForSelector(sel.uploadedVideoItem, {
|
||||
timeout: VIDEO_UPLOAD_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
// Give the UI time to settle after video processing.
|
||||
await page.waitForTimeout(UPLOAD_SETTLE_MS);
|
||||
|
||||
log.debug('Video uploaded successfully');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Fill in the title
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const titleInput = await page.waitForSelector(sel.titleInput, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await titleInput.click();
|
||||
await titleInput.fill('');
|
||||
await page.keyboard.type(title, { delay: 30 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Fill in the content / description
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const contentEditor = await page.waitForSelector(sel.contentEditor, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await contentEditor.click();
|
||||
await page.keyboard.type(content, { delay: 20 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Add tags (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.tags && options.tags.length > 0) {
|
||||
await addTags(page, options.tags);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 6. Set visibility (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.visibility && options.visibility !== 'public') {
|
||||
await setVisibility(page, options.visibility);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 7. Set schedule (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.scheduleAt) {
|
||||
await setSchedule(page, options.scheduleAt);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 8. Click the publish button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const publishBtn = await page.waitForSelector(sel.publishButton, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await publishBtn.click();
|
||||
|
||||
log.debug('Publish button clicked, waiting for success');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 9. Wait for success indicator
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const result = await waitForPublishResult(page);
|
||||
|
||||
log.info({ result }, 'Video note publish complete');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers (shared patterns with publish.ts — kept separate to
|
||||
// avoid circular imports and keep each module self-contained)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Add hashtag / topic tags by typing into the content editor.
|
||||
*/
|
||||
async function addTags(page: Page, tags: string[]): Promise<void> {
|
||||
for (const tag of tags) {
|
||||
const editor = await page.$(sel.contentEditor);
|
||||
if (editor) {
|
||||
await editor.click();
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
|
||||
await page.keyboard.type(`#${tag}`, { delay: 50 });
|
||||
await page.waitForTimeout(800);
|
||||
|
||||
const suggestion = await page
|
||||
.waitForSelector(sel.tagSuggestionItem, { timeout: 3_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (suggestion) {
|
||||
await suggestion.click();
|
||||
} else {
|
||||
await page.keyboard.press('Enter');
|
||||
}
|
||||
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the note visibility (private or friends-only).
|
||||
*/
|
||||
async function setVisibility(page: Page, visibility: string): Promise<void> {
|
||||
const visBtn = await page.$(sel.visibilityButton);
|
||||
if (!visBtn) {
|
||||
log.debug('Visibility button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await visBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
let optionSelector: string;
|
||||
switch (visibility) {
|
||||
case 'private':
|
||||
optionSelector = sel.visibilityPrivate;
|
||||
break;
|
||||
case 'friends':
|
||||
optionSelector = sel.visibilityFriends;
|
||||
break;
|
||||
default:
|
||||
optionSelector = sel.visibilityPublic;
|
||||
break;
|
||||
}
|
||||
|
||||
const option = await page.$(optionSelector);
|
||||
if (option) {
|
||||
await option.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug({ visibility }, 'Visibility option not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the schedule / timing picker and set the publish date.
|
||||
*/
|
||||
async function setSchedule(page: Page, scheduleAt: string): Promise<void> {
|
||||
const scheduleBtn = await page.$(sel.scheduleButton);
|
||||
if (!scheduleBtn) {
|
||||
log.debug('Schedule button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await scheduleBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
const scheduleInput = await page.$(sel.scheduleInput);
|
||||
if (scheduleInput) {
|
||||
await scheduleInput.click();
|
||||
await scheduleInput.fill('');
|
||||
await page.keyboard.type(scheduleAt, { delay: 30 });
|
||||
await page.keyboard.press('Enter');
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug('Schedule input not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the publish success indicator.
|
||||
*/
|
||||
async function waitForPublishResult(
|
||||
page: Page,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
const urlChangePromise = page
|
||||
.waitForURL(sel.publishSuccessUrlPattern, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
const successElementPromise = page
|
||||
.waitForSelector(sel.publishSuccess, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
await page.waitForTimeout(PUBLISH_SETTLE_MS);
|
||||
|
||||
const [urlChanged, elementAppeared] = await Promise.all([
|
||||
urlChangePromise,
|
||||
successElementPromise,
|
||||
]);
|
||||
|
||||
if (!urlChanged && !elementAppeared) {
|
||||
const pageContent = await page.content();
|
||||
const hasSuccessText =
|
||||
pageContent.includes('发布成功') || pageContent.includes('已发布');
|
||||
|
||||
if (!hasSuccessText) {
|
||||
log.warn('No success indicator found after video publish');
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
const noteId = extractNoteIdFromUrl(page.url());
|
||||
|
||||
return { success: true, noteId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to extract a note / post ID from the URL after successful publish.
|
||||
*/
|
||||
function extractNoteIdFromUrl(url: string): string | undefined {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const noteIdParam = parsed.searchParams.get('noteId');
|
||||
if (noteIdParam) return noteIdParam;
|
||||
|
||||
const pathMatch = parsed.pathname.match(/\/note\/([a-f0-9]+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
} catch {
|
||||
// URL parsing failed — noteId is optional.
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
@@ -0,0 +1,375 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CREATOR_PUBLISH_URL = 'https://creator.xiaohongshu.com/publish/publish';
|
||||
|
||||
/** Maximum time to wait for image uploads to finish (60 seconds). */
|
||||
const UPLOAD_WAIT_TIMEOUT_MS = 60_000;
|
||||
|
||||
/** Polling interval for checking upload completion. */
|
||||
const UPLOAD_POLL_INTERVAL_MS = 500;
|
||||
|
||||
/** Wait between image upload completions to let the UI settle. */
|
||||
const UPLOAD_SETTLE_MS = 1_500;
|
||||
|
||||
/** Wait after filling form fields to let debounce / auto-save settle. */
|
||||
const FIELD_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after clicking publish before checking result. */
|
||||
const PUBLISH_SETTLE_MS = 3_000;
|
||||
|
||||
const sel = XHS_SELECTORS.publish;
|
||||
|
||||
const log = logger.child({ module: 'xhs-publish' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// publishImageNote
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PublishImageOptions {
|
||||
tags?: string[];
|
||||
scheduleAt?: string;
|
||||
isOriginal?: boolean;
|
||||
visibility?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Publish an image note on Xiaohongshu via the creator center UI.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param title - Note title (required, non-empty).
|
||||
* @param content - Note body / description text.
|
||||
* @param imagePaths - Array of local file paths (already validated by caller).
|
||||
* @param options - Optional tags, schedule, original flag, and visibility.
|
||||
* @returns Object indicating success and an optional noteId if detectable.
|
||||
*/
|
||||
export async function publishImageNote(
|
||||
page: Page,
|
||||
title: string,
|
||||
content: string,
|
||||
imagePaths: string[],
|
||||
options?: PublishImageOptions,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
log.info(
|
||||
{ imageCount: imagePaths.length, hasOptions: !!options },
|
||||
'Starting image note publish',
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the creator publish page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.goto(CREATOR_PUBLISH_URL, { waitUntil: 'domcontentloaded' });
|
||||
// Allow the SPA to hydrate.
|
||||
await page.waitForTimeout(2_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Upload images via the file input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const fileInput = await page.waitForSelector(sel.imageFileInput, {
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
// Playwright's setInputFiles supports multiple files at once.
|
||||
await fileInput.setInputFiles(imagePaths);
|
||||
|
||||
log.debug({ count: imagePaths.length }, 'Files set on input element');
|
||||
|
||||
// Wait for all image thumbnails to appear (one per uploaded image).
|
||||
// Poll using page.$$ (Node-side API) to avoid needing browser-context
|
||||
// DOM types which are not available in our TypeScript lib config.
|
||||
await waitForUploadedImages(page, imagePaths.length);
|
||||
|
||||
// Give the UI a moment to settle after all uploads.
|
||||
await page.waitForTimeout(UPLOAD_SETTLE_MS);
|
||||
|
||||
log.debug('All images uploaded successfully');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Fill in the title
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const titleInput = await page.waitForSelector(sel.titleInput, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await titleInput.click();
|
||||
await titleInput.fill('');
|
||||
await page.keyboard.type(title, { delay: 30 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Fill in the content / description
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const contentEditor = await page.waitForSelector(sel.contentEditor, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await contentEditor.click();
|
||||
await page.keyboard.type(content, { delay: 20 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Add tags (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.tags && options.tags.length > 0) {
|
||||
await addTags(page, options.tags);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 6. Set original flag (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.isOriginal) {
|
||||
await setOriginal(page);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 7. Set visibility (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.visibility && options.visibility !== 'public') {
|
||||
await setVisibility(page, options.visibility);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 8. Set schedule (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.scheduleAt) {
|
||||
await setSchedule(page, options.scheduleAt);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 9. Click the publish button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const publishBtn = await page.waitForSelector(sel.publishButton, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await publishBtn.click();
|
||||
|
||||
log.debug('Publish button clicked, waiting for success');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 10. Wait for success indicator
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const result = await waitForPublishResult(page);
|
||||
|
||||
log.info({ result }, 'Image note publish complete');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Poll until the expected number of uploaded image thumbnails appear on the
|
||||
* page. Uses `page.$$` (Node-side) so we don't need browser-context DOM
|
||||
* types in our TypeScript configuration.
|
||||
*/
|
||||
async function waitForUploadedImages(
|
||||
page: Page,
|
||||
expectedCount: number,
|
||||
): Promise<void> {
|
||||
const deadline = Date.now() + UPLOAD_WAIT_TIMEOUT_MS;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const items = await page.$$(sel.uploadedImageItem);
|
||||
if (items.length >= expectedCount) {
|
||||
return;
|
||||
}
|
||||
await page.waitForTimeout(UPLOAD_POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Timed out waiting for ${String(expectedCount)} uploaded images after ${String(UPLOAD_WAIT_TIMEOUT_MS)}ms`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add hashtag / topic tags by typing into the tag input.
|
||||
* For each tag, type the `#` prefix plus tag text, then select from
|
||||
* the dropdown suggestion or press Enter.
|
||||
*/
|
||||
async function addTags(page: Page, tags: string[]): Promise<void> {
|
||||
for (const tag of tags) {
|
||||
// Click the content editor to ensure we're in the right context,
|
||||
// then type `#` + tag text which triggers the topic selector.
|
||||
const editor = await page.$(sel.contentEditor);
|
||||
if (editor) {
|
||||
await editor.click();
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
|
||||
// Type the hashtag prefix which triggers the topic dropdown
|
||||
await page.keyboard.type(`#${tag}`, { delay: 50 });
|
||||
await page.waitForTimeout(800);
|
||||
|
||||
// Try to click the first suggestion item; if not available, press Enter.
|
||||
const suggestion = await page
|
||||
.waitForSelector(sel.tagSuggestionItem, { timeout: 3_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (suggestion) {
|
||||
await suggestion.click();
|
||||
} else {
|
||||
await page.keyboard.press('Enter');
|
||||
}
|
||||
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the "original content" checkbox if it exists and is not already checked.
|
||||
*/
|
||||
async function setOriginal(page: Page): Promise<void> {
|
||||
const checkbox = await page.$(sel.originalCheckbox);
|
||||
if (checkbox) {
|
||||
const isChecked = await checkbox.isChecked();
|
||||
if (!isChecked) {
|
||||
await checkbox.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
} else {
|
||||
log.debug('Original checkbox not found, skipping');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the note visibility (private or friends-only).
|
||||
*/
|
||||
async function setVisibility(page: Page, visibility: string): Promise<void> {
|
||||
const visBtn = await page.$(sel.visibilityButton);
|
||||
if (!visBtn) {
|
||||
log.debug('Visibility button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await visBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
let optionSelector: string;
|
||||
switch (visibility) {
|
||||
case 'private':
|
||||
optionSelector = sel.visibilityPrivate;
|
||||
break;
|
||||
case 'friends':
|
||||
optionSelector = sel.visibilityFriends;
|
||||
break;
|
||||
default:
|
||||
optionSelector = sel.visibilityPublic;
|
||||
break;
|
||||
}
|
||||
|
||||
const option = await page.$(optionSelector);
|
||||
if (option) {
|
||||
await option.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug({ visibility }, 'Visibility option not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the schedule / timing picker and set the publish date.
|
||||
*/
|
||||
async function setSchedule(page: Page, scheduleAt: string): Promise<void> {
|
||||
const scheduleBtn = await page.$(sel.scheduleButton);
|
||||
if (!scheduleBtn) {
|
||||
log.debug('Schedule button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await scheduleBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
const scheduleInput = await page.$(sel.scheduleInput);
|
||||
if (scheduleInput) {
|
||||
await scheduleInput.click();
|
||||
await scheduleInput.fill('');
|
||||
await page.keyboard.type(scheduleAt, { delay: 30 });
|
||||
await page.keyboard.press('Enter');
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug('Schedule input not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the publish success indicator (URL change or success element).
|
||||
* Returns the result with an optional noteId extracted from the URL.
|
||||
*/
|
||||
async function waitForPublishResult(
|
||||
page: Page,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
// Strategy 1: Wait for the URL to change to a success page.
|
||||
// Strategy 2: Wait for a success element to appear.
|
||||
// Use Promise.all so both run concurrently.
|
||||
|
||||
const urlChangePromise = page
|
||||
.waitForURL(sel.publishSuccessUrlPattern, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
const successElementPromise = page
|
||||
.waitForSelector(sel.publishSuccess, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
// Also wait a short baseline for the button click to process.
|
||||
await page.waitForTimeout(PUBLISH_SETTLE_MS);
|
||||
|
||||
const [urlChanged, elementAppeared] = await Promise.all([
|
||||
urlChangePromise,
|
||||
successElementPromise,
|
||||
]);
|
||||
|
||||
if (!urlChanged && !elementAppeared) {
|
||||
// Final fallback: check if the page content indicates success.
|
||||
const pageContent = await page.content();
|
||||
const hasSuccessText =
|
||||
pageContent.includes('发布成功') || pageContent.includes('已发布');
|
||||
|
||||
if (!hasSuccessText) {
|
||||
log.warn('No success indicator found after publish');
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract the note ID from the current URL if available.
|
||||
const noteId = extractNoteIdFromUrl(page.url());
|
||||
|
||||
return { success: true, noteId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to extract a note / post ID from the URL after successful publish.
|
||||
*/
|
||||
function extractNoteIdFromUrl(url: string): string | undefined {
|
||||
// Pattern: /publish/success?noteId=xxx or /note/xxx
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const noteIdParam = parsed.searchParams.get('noteId');
|
||||
if (noteIdParam) return noteIdParam;
|
||||
|
||||
// Try path-based pattern: /note/<id>
|
||||
const pathMatch = parsed.pathname.match(/\/note\/([a-f0-9]+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
} catch {
|
||||
// URL parsing failed — not a problem, noteId is optional.
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
@@ -0,0 +1,590 @@
|
||||
import { Router } from 'express';
|
||||
import { z, ZodError } from 'zod';
|
||||
|
||||
import type { BrowserManager } from '../../browser/manager.js';
|
||||
import { config } from '../../config/index.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { classifyError, sanitizeErrorMessage } from '../../utils/errors.js';
|
||||
import { validateMediaPath } from '../../utils/downloader.js';
|
||||
import { rateLimiter } from '../../server/middleware.js';
|
||||
|
||||
import { checkLoginStatus, getLoginQRCode, deleteCookies } from './login.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { getFeedDetail } from './feed-detail.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
import { publishImageNote } from './publish.js';
|
||||
import { publishVideoNote } from './publish-video.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { toggleLike, toggleFavorite } from './interaction.js';
|
||||
|
||||
import {
|
||||
SearchSchema,
|
||||
GetFeedDetailSchema,
|
||||
GetUserProfileSchema,
|
||||
PublishImageSchema,
|
||||
PublishVideoSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
LikeSchema,
|
||||
FavoriteSchema,
|
||||
} from './schemas.js';
|
||||
|
||||
import type { SearchFilters } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
|
||||
/** Maximum file size for video uploads (500 MB). */
|
||||
const VIDEO_MAX_SIZE_MB = 500;
|
||||
|
||||
/** Maximum file size for image uploads (20 MB). */
|
||||
const IMAGE_MAX_SIZE_MB = 20;
|
||||
|
||||
const log = logger.child({ module: 'xhs-routes' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Zod schemas for REST body validation
|
||||
//
|
||||
// The MCP schemas in schemas.ts are "shape" objects (plain objects with zod
|
||||
// fields). For REST validation we wrap them in z.object() where needed.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const SearchBodySchema = z.object({
|
||||
keyword: SearchSchema.keyword,
|
||||
filters: SearchSchema.filters,
|
||||
});
|
||||
|
||||
const FeedDetailBodySchema = z.object({
|
||||
feed_id: GetFeedDetailSchema.feed_id,
|
||||
xsec_token: GetFeedDetailSchema.xsec_token,
|
||||
load_all_comments: GetFeedDetailSchema.load_all_comments,
|
||||
});
|
||||
|
||||
const UserProfileBodySchema = z.object({
|
||||
user_id: GetUserProfileSchema.user_id,
|
||||
xsec_token: GetUserProfileSchema.xsec_token,
|
||||
});
|
||||
|
||||
const PublishImageBodySchema = z.object({
|
||||
title: PublishImageSchema.title,
|
||||
content: PublishImageSchema.content,
|
||||
images: PublishImageSchema.images,
|
||||
tags: PublishImageSchema.tags,
|
||||
schedule_at: PublishImageSchema.schedule_at,
|
||||
is_original: PublishImageSchema.is_original,
|
||||
visibility: PublishImageSchema.visibility,
|
||||
});
|
||||
|
||||
const PublishVideoBodySchema = z.object({
|
||||
title: PublishVideoSchema.title,
|
||||
content: PublishVideoSchema.content,
|
||||
video: PublishVideoSchema.video,
|
||||
tags: PublishVideoSchema.tags,
|
||||
schedule_at: PublishVideoSchema.schedule_at,
|
||||
visibility: PublishVideoSchema.visibility,
|
||||
});
|
||||
|
||||
const PostCommentBodySchema = z.object({
|
||||
feed_id: PostCommentSchema.feed_id,
|
||||
xsec_token: PostCommentSchema.xsec_token,
|
||||
content: PostCommentSchema.content,
|
||||
});
|
||||
|
||||
const ReplyCommentBodySchema = z.object({
|
||||
feed_id: ReplyCommentSchema.feed_id,
|
||||
xsec_token: ReplyCommentSchema.xsec_token,
|
||||
content: ReplyCommentSchema.content,
|
||||
comment_id: ReplyCommentSchema.comment_id,
|
||||
user_id: ReplyCommentSchema.user_id,
|
||||
});
|
||||
|
||||
const LikeBodySchema = z.object({
|
||||
feed_id: LikeSchema.feed_id,
|
||||
xsec_token: LikeSchema.xsec_token,
|
||||
unlike: LikeSchema.unlike,
|
||||
});
|
||||
|
||||
const FavoriteBodySchema = z.object({
|
||||
feed_id: FavoriteSchema.feed_id,
|
||||
xsec_token: FavoriteSchema.xsec_token,
|
||||
unfavorite: FavoriteSchema.unfavorite,
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Standard JSON response helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface ApiSuccessResponse<T> {
|
||||
success: true;
|
||||
data: T;
|
||||
}
|
||||
|
||||
interface ApiErrorResponse {
|
||||
success: false;
|
||||
error: {
|
||||
code: string;
|
||||
message: string;
|
||||
};
|
||||
}
|
||||
|
||||
type ApiResponse<T> = ApiSuccessResponse<T> | ApiErrorResponse;
|
||||
|
||||
function successResponse<T>(data: T): ApiSuccessResponse<T> {
|
||||
return { success: true, data };
|
||||
}
|
||||
|
||||
function errorResponse(code: string, message: string): ApiErrorResponse {
|
||||
return { success: false, error: { code, message } };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Rate limiters
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const readRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 60 });
|
||||
const writeRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 10 });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Route factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create and return an Express Router with all Xiaohongshu REST API routes.
|
||||
*
|
||||
* Every handler calls the SAME action functions used by the MCP tools so
|
||||
* that business logic is never duplicated.
|
||||
*/
|
||||
export function createXhsRoutes(browser: BrowserManager): Router {
|
||||
const router = Router();
|
||||
|
||||
// =========================================================================
|
||||
// Login management
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GET /login/status
|
||||
// -----------------------------------------------------------------------
|
||||
router.get('/login/status', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['login'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const status = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => checkLoginStatus(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(status) as ApiResponse<typeof status>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GET /login/qrcode
|
||||
// -----------------------------------------------------------------------
|
||||
router.get('/login/qrcode', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const result = await getLoginQRCode(browser);
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DELETE /login/cookies
|
||||
// -----------------------------------------------------------------------
|
||||
router.delete('/login/cookies', writeRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
await deleteCookies(browser);
|
||||
res.json(successResponse({ message: 'Cookies deleted' }) as ApiResponse<{ message: string }>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Content browsing
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GET /feeds
|
||||
// -----------------------------------------------------------------------
|
||||
router.get('/feeds', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['feed_list'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => listFeeds(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(feeds) as ApiResponse<typeof feeds>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /search
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/search', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = SearchBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['search'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const filters: SearchFilters | undefined = body.filters
|
||||
? {
|
||||
sort: body.filters.sort,
|
||||
type: body.filters.type,
|
||||
time: body.filters.time,
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => searchFeeds(page, body.keyword, filters),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(feeds) as ApiResponse<typeof feeds>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /feeds/detail
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/feeds/detail', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FeedDetailBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['feed_detail'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const detail = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getFeedDetail(
|
||||
page,
|
||||
body.feed_id,
|
||||
body.xsec_token,
|
||||
body.load_all_comments,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(detail) as ApiResponse<typeof detail>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /user/profile
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/user/profile', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = UserProfileBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['user_profile'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const profile = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getUserProfile(page, body.user_id, body.xsec_token),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(profile) as ApiResponse<typeof profile>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Content publishing
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /publish/image
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/publish/image', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PublishImageBodySchema.parse(req.body);
|
||||
|
||||
// Validate all image paths before acquiring a browser page.
|
||||
const validatedPaths: string[] = [];
|
||||
for (const imagePath of body.images) {
|
||||
const resolved = await validateMediaPath(imagePath, {
|
||||
maxSizeMB: IMAGE_MAX_SIZE_MB,
|
||||
});
|
||||
validatedPaths.push(resolved);
|
||||
}
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishImageNote(page, body.title, body.content, validatedPaths, {
|
||||
tags: body.tags,
|
||||
scheduleAt: body.schedule_at,
|
||||
isOriginal: body.is_original,
|
||||
visibility: body.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /publish/video
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/publish/video', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PublishVideoBodySchema.parse(req.body);
|
||||
|
||||
// Validate the video path before acquiring a browser page.
|
||||
const validatedPath = await validateMediaPath(body.video, {
|
||||
maxSizeMB: VIDEO_MAX_SIZE_MB,
|
||||
});
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishVideoNote(page, body.title, body.content, validatedPath, {
|
||||
tags: body.tags,
|
||||
scheduleAt: body.schedule_at,
|
||||
visibility: body.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Interactions
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /comment
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/comment', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PostCommentBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['comment'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
postComment(page, body.feed_id, body.xsec_token, body.content),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /comment/reply
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/comment/reply', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = ReplyCommentBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['reply'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
replyComment(
|
||||
page,
|
||||
body.feed_id,
|
||||
body.xsec_token,
|
||||
body.content,
|
||||
body.comment_id,
|
||||
body.user_id,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /like
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/like', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = LikeBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['like'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleLike(page, body.feed_id, body.xsec_token, body.unlike),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /favorite
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/favorite', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FavoriteBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['favorite'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleFavorite(
|
||||
page,
|
||||
body.feed_id,
|
||||
body.xsec_token,
|
||||
body.unfavorite,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Error handling helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Unified error handler for REST route handlers.
|
||||
*
|
||||
* - ZodError -> 400 with VALIDATION_ERROR
|
||||
* - Business errors -> appropriate code from classifyError()
|
||||
* - Unknown errors -> 500
|
||||
*/
|
||||
function handleError(res: import('express').Response, err: unknown): void {
|
||||
if (err instanceof ZodError) {
|
||||
const issues = err.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
|
||||
res.status(400).json(errorResponse('VALIDATION_ERROR', issues));
|
||||
return;
|
||||
}
|
||||
|
||||
const error = err instanceof Error ? err : new Error(String(err));
|
||||
const category = classifyError(error);
|
||||
const message = sanitizeErrorMessage(error.message);
|
||||
|
||||
log.error({ err: error, category }, 'REST API handler error');
|
||||
|
||||
let statusCode: number;
|
||||
switch (category) {
|
||||
case 'AUTH_REQUIRED':
|
||||
statusCode = 401;
|
||||
break;
|
||||
case 'TIMEOUT':
|
||||
statusCode = 504;
|
||||
break;
|
||||
case 'NETWORK':
|
||||
statusCode = 502;
|
||||
break;
|
||||
default:
|
||||
statusCode = 500;
|
||||
break;
|
||||
}
|
||||
|
||||
res.status(statusCode).json(errorResponse(category, message));
|
||||
}
|
||||
@@ -0,0 +1,148 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MCP tool parameter schemas for all 13 Xiaohongshu tools.
|
||||
//
|
||||
// Phase 2 tools (login) have no parameters — their schemas are empty objects.
|
||||
// Phase 3/4 schemas are defined here so that the full tool surface is
|
||||
// established upfront and types can be inferred with z.infer<>.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// -- Phase 2: Login management (3 tools) -----------------------------------
|
||||
|
||||
/** xhs_check_login — no parameters. */
|
||||
export const CheckLoginSchema = {};
|
||||
|
||||
/** xhs_get_login_qrcode — no parameters. */
|
||||
export const GetLoginQRCodeSchema = {};
|
||||
|
||||
/** xhs_delete_cookies — no parameters. */
|
||||
export const DeleteCookiesSchema = {};
|
||||
|
||||
// -- Phase 3: Content browsing (4 tools) -----------------------------------
|
||||
|
||||
/** xhs_list_feeds — no parameters. */
|
||||
export const ListFeedsSchema = {};
|
||||
|
||||
/** xhs_search */
|
||||
export const SearchSchema = {
|
||||
keyword: z.string().describe('Search keyword'),
|
||||
filters: z
|
||||
.object({
|
||||
sort: z
|
||||
.enum(['general', 'time_descending', 'popularity_descending'])
|
||||
.optional()
|
||||
.describe('Sort order'),
|
||||
type: z
|
||||
.enum(['all', 'note', 'video'])
|
||||
.optional()
|
||||
.describe('Content type filter'),
|
||||
time: z
|
||||
.enum(['all', 'day', 'week', 'half_year'])
|
||||
.optional()
|
||||
.describe('Time range filter'),
|
||||
})
|
||||
.optional()
|
||||
.describe('Optional search filters'),
|
||||
};
|
||||
|
||||
/** xhs_get_feed_detail */
|
||||
export const GetFeedDetailSchema = {
|
||||
feed_id: z.string().describe('Feed (note) ID'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
load_all_comments: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Whether to scroll and load all comments'),
|
||||
};
|
||||
|
||||
/** xhs_get_user_profile */
|
||||
export const GetUserProfileSchema = {
|
||||
user_id: z.string().describe('User ID'),
|
||||
xsec_token: z.string().describe('Security token for the user page'),
|
||||
};
|
||||
|
||||
// -- Phase 4: Content publishing (2 tools) ---------------------------------
|
||||
|
||||
/** xhs_publish_image */
|
||||
export const PublishImageSchema = {
|
||||
title: z.string().min(1).describe('Note title'),
|
||||
content: z.string().describe('Note body text'),
|
||||
images: z
|
||||
.array(z.string())
|
||||
.min(1)
|
||||
.describe('Array of image file paths or URLs'),
|
||||
tags: z.array(z.string()).optional().describe('Hashtags to attach'),
|
||||
schedule_at: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('ISO 8601 datetime for scheduled publishing'),
|
||||
is_original: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Mark as original content'),
|
||||
visibility: z
|
||||
.enum(['public', 'private', 'friends'])
|
||||
.optional()
|
||||
.default('public')
|
||||
.describe('Visibility setting'),
|
||||
};
|
||||
|
||||
/** xhs_publish_video */
|
||||
export const PublishVideoSchema = {
|
||||
title: z.string().min(1).describe('Note title'),
|
||||
content: z.string().describe('Note body text'),
|
||||
video: z.string().describe('Video file path or URL'),
|
||||
tags: z.array(z.string()).optional().describe('Hashtags to attach'),
|
||||
schedule_at: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('ISO 8601 datetime for scheduled publishing'),
|
||||
visibility: z
|
||||
.enum(['public', 'private', 'friends'])
|
||||
.optional()
|
||||
.default('public')
|
||||
.describe('Visibility setting'),
|
||||
};
|
||||
|
||||
// -- Phase 4: Interactions (4 tools) ---------------------------------------
|
||||
|
||||
/** xhs_post_comment */
|
||||
export const PostCommentSchema = {
|
||||
feed_id: z.string().describe('Feed ID to comment on'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
content: z.string().min(1).describe('Comment text'),
|
||||
};
|
||||
|
||||
/** xhs_reply_comment */
|
||||
export const ReplyCommentSchema = {
|
||||
feed_id: z.string().describe('Feed ID'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
comment_id: z.string().optional().describe('Comment ID to reply to'),
|
||||
user_id: z.string().optional().describe('User ID of the comment author'),
|
||||
content: z.string().min(1).describe('Reply text'),
|
||||
};
|
||||
|
||||
/** xhs_like */
|
||||
export const LikeSchema = {
|
||||
feed_id: z.string().describe('Feed ID to like'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
unlike: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Set to true to unlike'),
|
||||
};
|
||||
|
||||
/** xhs_favorite */
|
||||
export const FavoriteSchema = {
|
||||
feed_id: z.string().describe('Feed ID to favorite'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
unfavorite: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Set to true to unfavorite'),
|
||||
};
|
||||
@@ -0,0 +1,387 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { Feed, SearchFilters } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const SEARCH_BASE_URL = 'https://www.xiaohongshu.com/search_result';
|
||||
|
||||
const log = logger.child({ module: 'xhs-search' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sort value mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Map our public sort enum values to the URL query parameter values. */
|
||||
const SORT_PARAM: Record<string, string> = {
|
||||
general: '0',
|
||||
time_descending: '1',
|
||||
popularity_descending: '2',
|
||||
};
|
||||
|
||||
/** Map our note type filter values to the URL query parameter values. */
|
||||
const TYPE_PARAM: Record<string, string> = {
|
||||
all: '0',
|
||||
note: '1',
|
||||
video: '2',
|
||||
};
|
||||
|
||||
/** Map time range filter values to URL query parameter values. */
|
||||
const TIME_PARAM: Record<string, string> = {
|
||||
all: '0',
|
||||
day: '1',
|
||||
week: '2',
|
||||
half_year: '3',
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for search results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawSearchFeedItem {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
name?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
noteCard?: RawSearchNoteCard;
|
||||
type?: string;
|
||||
model_type?: string;
|
||||
cover?: RawSearchImage;
|
||||
user?: RawSearchUser;
|
||||
interactInfo?: RawSearchInteractInfo;
|
||||
interact_info?: RawSearchInteractInfo;
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
interface RawSearchNoteCard {
|
||||
noteId?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: RawSearchImage;
|
||||
user?: RawSearchUser;
|
||||
interactInfo?: RawSearchInteractInfo;
|
||||
interact_info?: RawSearchInteractInfo;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
}
|
||||
|
||||
interface RawSearchImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawSearchUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawSearchInteractInfo {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
}
|
||||
|
||||
interface SearchInitialState {
|
||||
searchNotes?: {
|
||||
feeds?: RawSearchFeedItem[];
|
||||
};
|
||||
searchResult?: {
|
||||
notes?: RawSearchFeedItem[];
|
||||
feeds?: RawSearchFeedItem[];
|
||||
};
|
||||
search?: {
|
||||
feeds?: RawSearchFeedItem[];
|
||||
notes?: RawSearchFeedItem[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// searchFeeds
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Search Xiaohongshu for notes matching a keyword, with optional filters.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param keyword - The search term.
|
||||
* @param filters - Optional sorting, type, and time range filters.
|
||||
* @returns An array of Feed objects matching the search.
|
||||
*/
|
||||
export async function searchFeeds(
|
||||
page: Page,
|
||||
keyword: string,
|
||||
filters?: SearchFilters,
|
||||
): Promise<Feed[]> {
|
||||
const url = buildSearchUrl(keyword, filters);
|
||||
log.debug({ keyword, filters, url }, 'Navigating to search page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the page to render search results.
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as SearchInitialState | null;
|
||||
|
||||
if (initialState) {
|
||||
const feeds = parseSearchFeedsFromState(initialState);
|
||||
if (feeds.length > 0) {
|
||||
log.info({ keyword, count: feeds.length }, 'Extracted search results from __INITIAL_STATE__');
|
||||
return feeds;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no search feeds extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping using Playwright Node-side API
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug('Falling back to DOM scraping for search results');
|
||||
const feeds = await scrapeSearchResultsFromDom(page);
|
||||
log.info({ keyword, count: feeds.length }, 'Extracted search results from DOM');
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// URL construction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the full search URL with query parameters for keyword and filters.
|
||||
*/
|
||||
function buildSearchUrl(keyword: string, filters?: SearchFilters): string {
|
||||
const params = new URLSearchParams();
|
||||
params.set('keyword', keyword);
|
||||
|
||||
if (filters?.sort && SORT_PARAM[filters.sort]) {
|
||||
params.set('sort', SORT_PARAM[filters.sort]!);
|
||||
}
|
||||
|
||||
if (filters?.type && TYPE_PARAM[filters.type]) {
|
||||
params.set('type', TYPE_PARAM[filters.type]!);
|
||||
}
|
||||
|
||||
if (filters?.time && TIME_PARAM[filters.time]) {
|
||||
params.set('time', TIME_PARAM[filters.time]!);
|
||||
}
|
||||
|
||||
return `${SEARCH_BASE_URL}?${params.toString()}`;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing for search results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse search results from the __INITIAL_STATE__ data.
|
||||
*/
|
||||
function parseSearchFeedsFromState(state: SearchInitialState): Feed[] {
|
||||
// Try multiple known locations where search data may live.
|
||||
const rawFeeds: RawSearchFeedItem[] =
|
||||
state.searchNotes?.feeds ??
|
||||
state.searchResult?.notes ??
|
||||
state.searchResult?.feeds ??
|
||||
state.search?.feeds ??
|
||||
state.search?.notes ??
|
||||
[];
|
||||
|
||||
if (rawFeeds.length === 0) {
|
||||
// Walk top-level keys looking for an array that resembles feeds.
|
||||
for (const key of Object.keys(state)) {
|
||||
const value = state[key];
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const obj = value as Record<string, unknown>;
|
||||
const candidates = ['feeds', 'notes', 'items'];
|
||||
for (const candidate of candidates) {
|
||||
if (Array.isArray(obj[candidate])) {
|
||||
const parsed = (obj[candidate] as RawSearchFeedItem[])
|
||||
.map(parseRawSearchItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
if (parsed.length > 0) return parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawFeeds
|
||||
.map(parseRawSearchItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a single raw search result item into a structured Feed.
|
||||
*/
|
||||
function parseRawSearchItem(raw: RawSearchFeedItem): Feed | null {
|
||||
const card = raw.noteCard;
|
||||
|
||||
const id =
|
||||
raw.id ?? raw.noteId ?? raw.note_id ?? card?.noteId ?? '';
|
||||
const xsecToken =
|
||||
raw.xsecToken ?? raw.xsec_token ?? card?.xsecToken ?? card?.xsec_token ?? '';
|
||||
const title =
|
||||
raw.displayTitle ?? raw.display_title ?? raw.title ?? raw.name ??
|
||||
card?.displayTitle ?? card?.display_title ?? card?.title ?? '';
|
||||
const description =
|
||||
raw.desc ?? raw.description ?? card?.desc ?? '';
|
||||
|
||||
const rawType = raw.type ?? raw.model_type ?? card?.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
const rawCover = raw.cover ?? card?.cover;
|
||||
const coverUrl = extractSearchImageUrl(rawCover);
|
||||
|
||||
const rawUser = raw.user ?? card?.user;
|
||||
const userId = rawUser?.userId ?? rawUser?.user_id ?? '';
|
||||
const nickname =
|
||||
rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '';
|
||||
const avatar =
|
||||
rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '';
|
||||
|
||||
const interactInfo = raw.interactInfo ?? raw.interact_info ?? card?.interactInfo ?? card?.interact_info;
|
||||
const likeCountStr =
|
||||
interactInfo?.likedCount ?? interactInfo?.liked_count ??
|
||||
interactInfo?.likeCount ?? interactInfo?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
if (!id) return null;
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract image URL from a raw search cover object.
|
||||
*/
|
||||
function extractSearchImageUrl(raw: RawSearchImage | undefined): string {
|
||||
if (!raw) return '';
|
||||
if (raw.url) return ensureHttps(raw.url);
|
||||
if (raw.urlPre) return ensureHttps(raw.urlPre);
|
||||
if (raw.urlDefault) return ensureHttps(raw.urlDefault);
|
||||
if (raw.url_pre) return ensureHttps(raw.url_pre);
|
||||
if (raw.url_default) return ensureHttps(raw.url_default);
|
||||
|
||||
const infoList = raw.infoList ?? raw.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
return ensureHttps(infoList[0].url);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape search results using Playwright's Node-side API to avoid
|
||||
* needing DOM lib types.
|
||||
*/
|
||||
async function scrapeSearchResultsFromDom(page: Page): Promise<Feed[]> {
|
||||
// Wait for the search result note items to appear.
|
||||
await page
|
||||
.waitForSelector('.feeds-container .note-item', { timeout: 10_000 })
|
||||
.catch(() => null);
|
||||
|
||||
const cardElements = await page.$$('.feeds-container .note-item');
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of cardElements) {
|
||||
try {
|
||||
const href = await card
|
||||
.$eval('a.cover', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const xsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
const coverUrl = await card
|
||||
.$eval('a.cover img', (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const title = await card
|
||||
.$eval('.footer .title', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const nickname = await card
|
||||
.$eval('.footer .author-wrapper .name', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const avatar = await card
|
||||
.$eval('.footer .author-wrapper .author-head img', (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const authorHref = await card
|
||||
.$eval('.footer .author-wrapper a', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const authorIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
const likeText = await card
|
||||
.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
const likeCount = parseCountString(likeText);
|
||||
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// CSS Selectors — centralised so that UI changes only require edits here.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const XHS_SELECTORS = {
|
||||
login: {
|
||||
/** QR code image on the login modal / page. */
|
||||
qrCodeImage: '.login-container .qrcode-img',
|
||||
/** Element present only when the user is logged in (sidebar channel link). */
|
||||
loggedInIndicator: '.user .link-wrapper .channel',
|
||||
/** The "login" button that opens the QR code modal (if not already shown). */
|
||||
loginButton: '.login-btn',
|
||||
},
|
||||
|
||||
feed: {
|
||||
/** Container for each feed card on the explore page. */
|
||||
feedCard: '.note-item',
|
||||
/** The cover image within a feed card. */
|
||||
coverImage: '.note-item a.cover img',
|
||||
/** The title/footer within a feed card. */
|
||||
footerTitle: '.note-item .footer .title',
|
||||
/** Author name within a feed card. */
|
||||
authorName: '.note-item .footer .author-wrapper .name',
|
||||
/** Author avatar within a feed card. */
|
||||
authorAvatar: '.note-item .footer .author-wrapper .author-head img',
|
||||
/** Like count within a feed card. */
|
||||
likeCount: '.note-item .footer .like-wrapper .count',
|
||||
},
|
||||
|
||||
search: {
|
||||
/** Search result container. */
|
||||
resultContainer: '#global-search-result-container',
|
||||
/** Individual search result note items. */
|
||||
noteItem: '.feeds-container .note-item',
|
||||
/** Search result cover image. */
|
||||
coverImage: '.feeds-container .note-item a.cover img',
|
||||
/** Search result title. */
|
||||
title: '.feeds-container .note-item .footer .title',
|
||||
/** Search result author name. */
|
||||
authorName: '.feeds-container .note-item .footer .author-wrapper .name',
|
||||
/** Search result author avatar. */
|
||||
authorAvatar: '.feeds-container .note-item .footer .author-wrapper .author-head img',
|
||||
/** Search result like count. */
|
||||
likeCount: '.feeds-container .note-item .footer .like-wrapper .count',
|
||||
},
|
||||
|
||||
feedDetail: {
|
||||
/** The main content container for a note detail page. */
|
||||
noteContainer: '#noteContainer',
|
||||
/** The title of the note. */
|
||||
title: '#detail-title',
|
||||
/** The description / body content of the note. */
|
||||
description: '#detail-desc',
|
||||
/** Individual images in an image note. */
|
||||
images: '.note-image-list .note-image img',
|
||||
/** The single hero image (some notes use this instead of a list). */
|
||||
heroImage: '.note-hero img',
|
||||
/** Video player element. */
|
||||
video: '#videoplayer video',
|
||||
/** Video player source. */
|
||||
videoSource: '#videoplayer video source',
|
||||
/** Tag links within the note body. */
|
||||
tags: '#detail-desc a.tag',
|
||||
/** Like count. */
|
||||
likeCount: '.engage-bar .like-wrapper .count',
|
||||
/** Collect (favorite) count. */
|
||||
collectCount: '.engage-bar .collect-wrapper .count',
|
||||
/** Comment count. */
|
||||
commentCount: '.engage-bar .chat-wrapper .count',
|
||||
/** Share count. */
|
||||
shareCount: '.engage-bar .share-wrapper .count',
|
||||
/** Publish / create time text. */
|
||||
createTime: '.note-scroller .bottom-container .date',
|
||||
/** IP location. */
|
||||
ipLocation: '.note-scroller .bottom-container .ip-location',
|
||||
/** Author nickname on the detail page. */
|
||||
authorName: '.author-container .info .name',
|
||||
/** Author avatar on the detail page. */
|
||||
authorAvatar: '.author-container .info .avatar img',
|
||||
/** Author user ID link. */
|
||||
authorLink: '.author-container .info a',
|
||||
/** Comment list container. */
|
||||
commentListContainer: '.comments-container .list-container',
|
||||
/** Individual top-level comment items. */
|
||||
commentItem: '.comments-container .list-container .list-item',
|
||||
/** Parent comment content text. */
|
||||
commentContent: '.content',
|
||||
/** Comment author name. */
|
||||
commentAuthor: '.author .name',
|
||||
/** Comment author avatar. */
|
||||
commentAvatar: '.author .avatar img',
|
||||
/** Comment like count. */
|
||||
commentLikeCount: '.like .count',
|
||||
/** Comment publish time. */
|
||||
commentTime: '.date',
|
||||
/** Comment IP location. */
|
||||
commentIpLocation: '.ip-location',
|
||||
/** Sub-comment (reply) items. */
|
||||
subCommentItem: '.sub-comment-list .sub-comment-item',
|
||||
/** "Show more comments" button. */
|
||||
showMoreComments: '.comments-container .show-more',
|
||||
/** "Load more replies" button within a comment thread. */
|
||||
loadMoreReplies: '.sub-comment-list .show-more',
|
||||
},
|
||||
|
||||
userProfile: {
|
||||
/** Profile header container. */
|
||||
headerContainer: '.user-info',
|
||||
/** User nickname. */
|
||||
nickname: '.user-info .user-name',
|
||||
/** User avatar image. */
|
||||
avatar: '.user-info .user-image img',
|
||||
/** User bio / description text. */
|
||||
description: '.user-info .user-desc',
|
||||
/** User gender icon or text. */
|
||||
gender: '.user-info .gender-icon',
|
||||
/** IP location. */
|
||||
ipLocation: '.user-info .user-ip',
|
||||
/** Follower / following / interaction count elements. */
|
||||
followCount: '.user-info .data-area .data-item',
|
||||
/** Note count (displayed somewhere on the profile page). */
|
||||
noteCountTab: '.reds-tab-item',
|
||||
/** Individual feed items on the user profile. */
|
||||
feedItem: '.feeds-container .note-item',
|
||||
},
|
||||
|
||||
// -- Phase 4: Publish -----------------------------------------------------
|
||||
|
||||
publish: {
|
||||
/** The file input element for uploading images on the creator publish page. */
|
||||
imageFileInput: 'input[type="file"]',
|
||||
/** Title input field on the publish form. */
|
||||
titleInput: '#note-title',
|
||||
/** Content / body editor area on the publish form (contenteditable). */
|
||||
contentEditor: '#note-content',
|
||||
/** The tag / topic button that opens the topic input. */
|
||||
tagButton: '#topicBtn',
|
||||
/** Tag / topic input field for typing hashtags. */
|
||||
tagInput: '#topicBtn input',
|
||||
/** Topic / hashtag suggestion dropdown item. */
|
||||
tagSuggestionItem: '.publish-topic-item, .topic-item',
|
||||
/** "Publish" / submit button. */
|
||||
publishButton: '.publishBtn',
|
||||
/** Schedule / timing selector button. */
|
||||
scheduleButton: '.timing-btn, button:has-text("定时")',
|
||||
/** Schedule date/time input field. */
|
||||
scheduleInput: '.timing-input input, .schedule-input input',
|
||||
/** Original content declaration checkbox. */
|
||||
originalCheckbox: '.original-checkbox input, input[type="checkbox"][name="original"]',
|
||||
/** Visibility / permission setting button. */
|
||||
visibilityButton: '.permission-btn, button:has-text("可见")',
|
||||
/** Visibility option for public. */
|
||||
visibilityPublic: '.permission-option:has-text("公开"), .visibility-option:has-text("公开")',
|
||||
/** Visibility option for private. */
|
||||
visibilityPrivate: '.permission-option:has-text("私密"), .visibility-option:has-text("私密")',
|
||||
/** Visibility option for friends only. */
|
||||
visibilityFriends: '.permission-option:has-text("好友"), .visibility-option:has-text("好友")',
|
||||
/** Upload complete indicator (images uploaded and thumbnails visible). */
|
||||
uploadedImageItem: '.upload-item img, .img-item img, .image-item img',
|
||||
/** Video upload complete indicator (video thumbnail visible). */
|
||||
uploadedVideoItem: '.upload-video video, .video-item video, .video-container video',
|
||||
/** Success indicator shown after publish completes. */
|
||||
publishSuccess: '.success-panel, .publish-success, .note-success',
|
||||
/** URL in the address bar after successful publish (used as a fallback check). */
|
||||
publishSuccessUrlPattern: /\/publish\/success/,
|
||||
},
|
||||
|
||||
// -- Phase 4: Comment / Reply ---------------------------------------------
|
||||
|
||||
comment: {
|
||||
/** The comment input field / textarea on the feed detail page. */
|
||||
commentInput: '#content-textarea',
|
||||
/** Alternative comment input (contenteditable div). */
|
||||
commentInputAlt: '[contenteditable][data-placeholder]',
|
||||
/** Comment submit / send button. */
|
||||
commentSubmitButton: '.comment-submit, button.submit, .btn-send',
|
||||
/** Parent comment element (used to find specific comment by ID). */
|
||||
commentItem: '.comment-item, .note-comment-item, [id^="comment-"]',
|
||||
/** Reply button on an individual comment. */
|
||||
commentReplyButton: '.reply-btn, .comment-reply',
|
||||
/** Reply input that appears after clicking reply. */
|
||||
replyInput: '.reply-input textarea, .reply-content [contenteditable], .reply-area textarea',
|
||||
},
|
||||
|
||||
// -- Phase 4: Interaction (Like / Favorite) --------------------------------
|
||||
|
||||
interaction: {
|
||||
/** Like button on the feed detail page. */
|
||||
likeButton: '.engage-bar .like-wrapper, span.like-wrapper',
|
||||
/** Like button in active/liked state. */
|
||||
likeButtonActive: '.engage-bar .like-wrapper.active, span.like-wrapper.active',
|
||||
/** Like count element next to the like button. */
|
||||
likeCount: '.engage-bar .like-wrapper .count',
|
||||
/** Favorite / collect button on the feed detail page. */
|
||||
favoriteButton: '.engage-bar .collect-wrapper, span.collect-wrapper',
|
||||
/** Favorite button in active/favorited state. */
|
||||
favoriteButtonActive: '.engage-bar .collect-wrapper.active, span.collect-wrapper.active',
|
||||
/** Favorite count element next to the favorite button. */
|
||||
favoriteCount: '.engage-bar .collect-wrapper .count',
|
||||
/** Container for the interaction bar at the bottom of a feed detail. */
|
||||
interactionBar: '.interact-container, .engage-bar',
|
||||
},
|
||||
} as const;
|
||||
@@ -0,0 +1,98 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// Xiaohongshu domain types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// -- Login -----------------------------------------------------------------
|
||||
|
||||
export interface LoginStatus {
|
||||
loggedIn: boolean;
|
||||
username?: string;
|
||||
}
|
||||
|
||||
export interface QRCodeResult {
|
||||
/** Base64 data URI of the QR code image. */
|
||||
qrcodeData: string;
|
||||
/** Whether the user was already logged in (no QR code needed). */
|
||||
alreadyLoggedIn: boolean;
|
||||
/** Human-readable timeout hint (e.g. "4m"). */
|
||||
timeout: string;
|
||||
}
|
||||
|
||||
// -- Feed -----------------------------------------------------------------
|
||||
|
||||
export interface FeedUser {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
}
|
||||
|
||||
export interface Feed {
|
||||
id: string;
|
||||
xsecToken: string;
|
||||
title: string;
|
||||
description: string;
|
||||
type: 'normal' | 'video';
|
||||
coverUrl: string;
|
||||
likeCount: number;
|
||||
user: FeedUser;
|
||||
}
|
||||
|
||||
// -- Feed Detail ----------------------------------------------------------
|
||||
|
||||
export interface FeedDetail {
|
||||
id: string;
|
||||
xsecToken: string;
|
||||
title: string;
|
||||
description: string;
|
||||
type: 'normal' | 'video';
|
||||
images: string[];
|
||||
videoUrl?: string;
|
||||
tags: string[];
|
||||
likeCount: number;
|
||||
collectCount: number;
|
||||
commentCount: number;
|
||||
shareCount: number;
|
||||
createTime: string;
|
||||
lastUpdateTime: string;
|
||||
ipLocation: string;
|
||||
user: FeedUser;
|
||||
comments: Comment[];
|
||||
}
|
||||
|
||||
// -- Comment --------------------------------------------------------------
|
||||
|
||||
export interface Comment {
|
||||
id: string;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
likeCount: number;
|
||||
createTime: string;
|
||||
ipLocation: string;
|
||||
subComments: Comment[];
|
||||
}
|
||||
|
||||
// -- User Profile ---------------------------------------------------------
|
||||
|
||||
export interface UserProfile {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
description: string;
|
||||
gender: string;
|
||||
ipLocation: string;
|
||||
follows: number;
|
||||
fans: number;
|
||||
interaction: number;
|
||||
feedCount: number;
|
||||
feeds: Feed[];
|
||||
}
|
||||
|
||||
// -- Search Filters -------------------------------------------------------
|
||||
|
||||
export interface SearchFilters {
|
||||
sort?: 'general' | 'time_descending' | 'popularity_descending';
|
||||
type?: 'all' | 'note' | 'video';
|
||||
time?: 'all' | 'day' | 'week' | 'half_year';
|
||||
}
|
||||
@@ -0,0 +1,442 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { UserProfile, Feed } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const USER_PROFILE_BASE_URL = 'https://www.xiaohongshu.com/user/profile';
|
||||
|
||||
const SEL = XHS_SELECTORS.userProfile;
|
||||
|
||||
const log = logger.child({ module: 'xhs-user-profile' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for user profile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawProfileState {
|
||||
user?: {
|
||||
userPageData?: RawUserPageData;
|
||||
userInfo?: RawUserInfo;
|
||||
};
|
||||
userProfile?: {
|
||||
userInfo?: RawUserInfo;
|
||||
notes?: RawProfileNote[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface RawUserPageData {
|
||||
basicInfo?: RawUserInfo;
|
||||
interactions?: RawInteractions;
|
||||
notes?: RawProfileNote[];
|
||||
noteCount?: number | string;
|
||||
note_count?: number | string;
|
||||
}
|
||||
|
||||
interface RawUserInfo {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
images?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
gender?: number | string;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
fstatus?: string;
|
||||
follows?: number | string;
|
||||
fans?: number | string;
|
||||
interaction?: number | string;
|
||||
noteCount?: number | string;
|
||||
note_count?: number | string;
|
||||
}
|
||||
|
||||
interface RawInteractions {
|
||||
follows?: string | number;
|
||||
fans?: string | number;
|
||||
interaction?: string | number;
|
||||
}
|
||||
|
||||
interface RawProfileNote {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
url_pre?: string;
|
||||
urlDefault?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
};
|
||||
user?: {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
avatar?: string;
|
||||
};
|
||||
interactInfo?: {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
};
|
||||
interact_info?: {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
};
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getUserProfile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to a Xiaohongshu user profile page and extract their information,
|
||||
* including basic info, follower/following counts, and recent notes.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param userId - The user ID.
|
||||
* @param xsecToken - Security token required to access the profile page.
|
||||
* @returns A UserProfile object with the user's data.
|
||||
*/
|
||||
export async function getUserProfile(
|
||||
page: Page,
|
||||
userId: string,
|
||||
xsecToken: string,
|
||||
): Promise<UserProfile> {
|
||||
const url = `${USER_PROFILE_BASE_URL}/${userId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_note`;
|
||||
log.debug({ userId, url }, 'Navigating to user profile page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the user profile header to appear.
|
||||
await page
|
||||
.waitForSelector(SEL.headerContainer, { timeout: 15_000 })
|
||||
.catch(() => {
|
||||
log.warn({ userId }, 'User profile header not found within timeout, proceeding');
|
||||
});
|
||||
|
||||
// Allow render to settle.
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as RawProfileState | null;
|
||||
|
||||
if (initialState) {
|
||||
const profile = parseProfileFromState(initialState, userId, xsecToken);
|
||||
if (profile) {
|
||||
log.info({ userId, feedCount: profile.feeds.length }, 'Extracted user profile from __INITIAL_STATE__');
|
||||
return profile;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no profile data extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug({ userId }, 'Falling back to DOM scraping for user profile');
|
||||
const profile = await scrapeProfileFromDom(page, userId, xsecToken);
|
||||
log.info({ userId, feedCount: profile.feeds.length }, 'Extracted user profile from DOM');
|
||||
return profile;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse user profile data from __INITIAL_STATE__.
|
||||
*/
|
||||
function parseProfileFromState(
|
||||
state: RawProfileState,
|
||||
userId: string,
|
||||
_xsecToken: string,
|
||||
): UserProfile | null {
|
||||
// Try multiple known locations for user data.
|
||||
const userPageData = state.user?.userPageData;
|
||||
const userInfo =
|
||||
userPageData?.basicInfo ??
|
||||
state.user?.userInfo ??
|
||||
state.userProfile?.userInfo;
|
||||
|
||||
if (!userInfo) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const id = userInfo.userId ?? userInfo.user_id ?? userId;
|
||||
const nickname = userInfo.nickname ?? userInfo.nick_name ?? userInfo.nickName ?? '';
|
||||
const avatar = userInfo.avatar ?? userInfo.avatarUrl ?? userInfo.avatar_url ?? userInfo.images ?? '';
|
||||
const description = userInfo.desc ?? userInfo.description ?? '';
|
||||
|
||||
// Gender: 0=unknown, 1=male, 2=female
|
||||
const genderRaw = userInfo.gender;
|
||||
let gender = '';
|
||||
if (genderRaw === 1 || genderRaw === '1') gender = 'male';
|
||||
else if (genderRaw === 2 || genderRaw === '2') gender = 'female';
|
||||
|
||||
const ipLocation = userInfo.ipLocation ?? userInfo.ip_location ?? '';
|
||||
|
||||
// Follower / following / interaction counts.
|
||||
const interactions = userPageData?.interactions;
|
||||
const follows = toNumber(interactions?.follows ?? userInfo.follows ?? 0);
|
||||
const fans = toNumber(interactions?.fans ?? userInfo.fans ?? 0);
|
||||
const interaction = toNumber(interactions?.interaction ?? userInfo.interaction ?? 0);
|
||||
|
||||
// Note count.
|
||||
const feedCount = toNumber(
|
||||
userPageData?.noteCount ?? userPageData?.note_count ??
|
||||
userInfo.noteCount ?? userInfo.note_count ?? 0,
|
||||
);
|
||||
|
||||
// Notes / feeds on the profile page.
|
||||
const rawNotes: RawProfileNote[] =
|
||||
userPageData?.notes ?? state.userProfile?.notes ?? [];
|
||||
const feeds = rawNotes
|
||||
.map((note) => parseProfileNote(note, userId))
|
||||
.filter((f): f is Feed => f !== null);
|
||||
|
||||
return {
|
||||
id,
|
||||
nickname,
|
||||
avatar: avatar ? ensureHttps(avatar) : '',
|
||||
description,
|
||||
gender,
|
||||
ipLocation,
|
||||
follows,
|
||||
fans,
|
||||
interaction,
|
||||
feedCount,
|
||||
feeds,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a note from the user profile state into a Feed object.
|
||||
*/
|
||||
function parseProfileNote(
|
||||
raw: RawProfileNote,
|
||||
ownerUserId: string,
|
||||
): Feed | null {
|
||||
const id = raw.id ?? raw.noteId ?? raw.note_id ?? '';
|
||||
if (!id) return null;
|
||||
|
||||
const noteXsecToken = raw.xsecToken ?? raw.xsec_token ?? '';
|
||||
const title = raw.displayTitle ?? raw.display_title ?? raw.title ?? '';
|
||||
const description = raw.desc ?? '';
|
||||
const rawType = raw.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Cover image.
|
||||
let coverUrl = '';
|
||||
if (raw.cover) {
|
||||
coverUrl =
|
||||
raw.cover.url ?? raw.cover.urlPre ?? raw.cover.url_pre ??
|
||||
raw.cover.urlDefault ?? raw.cover.url_default ?? '';
|
||||
if (!coverUrl) {
|
||||
const infoList = raw.cover.infoList ?? raw.cover.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
coverUrl = infoList[0].url;
|
||||
}
|
||||
}
|
||||
if (coverUrl) coverUrl = ensureHttps(coverUrl);
|
||||
}
|
||||
|
||||
// Like count.
|
||||
const interact = raw.interactInfo ?? raw.interact_info;
|
||||
const likeCountStr =
|
||||
interact?.likedCount ?? interact?.liked_count ??
|
||||
interact?.likeCount ?? interact?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
// User.
|
||||
const rawUser = raw.user;
|
||||
const user = {
|
||||
id: rawUser?.userId ?? rawUser?.user_id ?? ownerUserId,
|
||||
nickname: rawUser?.nickname ?? rawUser?.nick_name ?? '',
|
||||
avatar: rawUser?.avatar ?? '',
|
||||
};
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken: noteXsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape user profile data from the rendered DOM using Playwright's
|
||||
* Node-side APIs to avoid needing DOM lib types.
|
||||
*/
|
||||
async function scrapeProfileFromDom(
|
||||
page: Page,
|
||||
userId: string,
|
||||
xsecToken: string,
|
||||
): Promise<UserProfile> {
|
||||
// Nickname
|
||||
const nickname = await page
|
||||
.$eval(SEL.nickname, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Avatar
|
||||
const avatar = await page
|
||||
.$eval(SEL.avatar, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Description / bio
|
||||
const description = await page
|
||||
.$eval(SEL.description, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Gender — try the gender icon class.
|
||||
const gender = await page
|
||||
.$eval(SEL.gender, (el) => {
|
||||
const cls = el.className.toLowerCase();
|
||||
if (cls.includes('male') && !cls.includes('female')) return 'male';
|
||||
if (cls.includes('female')) return 'female';
|
||||
return '';
|
||||
})
|
||||
.catch(() => '');
|
||||
|
||||
// IP location
|
||||
const ipLocation = await page
|
||||
.$eval(SEL.ipLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Follower / following / interaction counts.
|
||||
// These are typically in a row of .data-item elements.
|
||||
const dataCounts = await page.$$eval(SEL.followCount, (items) =>
|
||||
items.map((item) => {
|
||||
const countEl = item.querySelector('.count');
|
||||
return countEl?.textContent?.trim() ?? '0';
|
||||
}),
|
||||
).catch(() => [] as string[]);
|
||||
|
||||
const follows = parseCountString(dataCounts[0] ?? '0');
|
||||
const fans = parseCountString(dataCounts[1] ?? '0');
|
||||
const interaction = parseCountString(dataCounts[2] ?? '0');
|
||||
|
||||
// Note count from tab — use a string expression to run in browser context
|
||||
// without needing DOM types in our TypeScript config.
|
||||
const feedCount = await page
|
||||
.$$eval(SEL.noteCountTab, (tabs) => {
|
||||
for (const tab of tabs) {
|
||||
const text = tab.textContent ?? '';
|
||||
if (text.includes('\u7B14\u8BB0')) {
|
||||
const match = text.match(/\d+/);
|
||||
return match ? parseInt(match[0], 10) : 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
})
|
||||
.catch(() => 0);
|
||||
|
||||
// Scrape feed items on the profile page.
|
||||
const feedElements = await page.$$(SEL.feedItem);
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of feedElements) {
|
||||
try {
|
||||
const href = await card
|
||||
.$eval('a.cover', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const noteXsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
const coverUrl = await card
|
||||
.$eval('a.cover img', (el) => el.getAttribute('src') ?? el.getAttribute('data-src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const feedTitle = await card
|
||||
.$eval('.footer .title', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const likeText = await card
|
||||
.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken: noteXsecToken || xsecToken,
|
||||
title: feedTitle,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount: parseCountString(likeText),
|
||||
user: { id: userId, nickname: '', avatar: '' },
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: userId,
|
||||
nickname,
|
||||
avatar,
|
||||
description,
|
||||
gender,
|
||||
ipLocation,
|
||||
follows,
|
||||
fans,
|
||||
interaction,
|
||||
feedCount,
|
||||
feeds,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convert a string or number to a number, handling abbreviations.
|
||||
*/
|
||||
function toNumber(val: string | number): number {
|
||||
if (typeof val === 'number') return val;
|
||||
return parseCountString(val);
|
||||
}
|
||||
@@ -0,0 +1,342 @@
|
||||
import http from 'node:http';
|
||||
|
||||
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
||||
import express from 'express';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { BrowserManager, browserManager } from '../browser/manager.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import {
|
||||
dnsRebindingGuard,
|
||||
shutdownGuard,
|
||||
errorHandler,
|
||||
bearerAuth,
|
||||
initBearerToken,
|
||||
} from './middleware.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Package version -- read once at module load for the /health endpoint and
|
||||
// the MCP server info. Uses a static string so we avoid importing JSON
|
||||
// (which would require `resolveJsonModule` + ESM assertion gymnastics).
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PACKAGE_VERSION = '0.1.0';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PlatformPlugin interface
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Contract that every platform adapter (Twitter, Xiaohongshu, etc.) must
|
||||
* implement to plug into the application.
|
||||
*
|
||||
* - `registerTools` is the minimum requirement: expose MCP tools.
|
||||
* - Optional hooks allow the plugin to mount REST routes, perform async
|
||||
* initialisation, clean up on shutdown, and report its health.
|
||||
*/
|
||||
export interface PlatformPlugin {
|
||||
/** Human-readable name used in logs and health-check output. */
|
||||
name: string;
|
||||
|
||||
/** Register MCP tools on the shared McpServer instance. */
|
||||
registerTools(server: McpServer, browser: BrowserManager): void;
|
||||
|
||||
/** Optionally mount Express routes (e.g. OAuth callbacks, webhooks). */
|
||||
registerRoutes?(router: express.Router, browser: BrowserManager): void;
|
||||
|
||||
/** Async initialisation (cookie restore, feature detection, etc.). */
|
||||
init?(): Promise<void>;
|
||||
|
||||
/** Teardown hook called during graceful shutdown. */
|
||||
shutdown?(): Promise<void>;
|
||||
|
||||
/** Return platform-specific health information. */
|
||||
healthCheck?(): Promise<{ healthy: boolean; message?: string }>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AppServer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class AppServer {
|
||||
// -- Public surface -------------------------------------------------------
|
||||
|
||||
/** The underlying Express application -- use for plugin route mounting. */
|
||||
readonly app: express.Express;
|
||||
|
||||
/** The MCP server instance -- use for plugin tool registration. */
|
||||
readonly mcpServer: McpServer;
|
||||
|
||||
// -- Internal state -------------------------------------------------------
|
||||
|
||||
private httpServer: http.Server | null = null;
|
||||
private shuttingDown = false;
|
||||
private readonly plugins: PlatformPlugin[] = [];
|
||||
|
||||
/**
|
||||
* SSE transports keyed by session ID so that POST /messages can route
|
||||
* incoming JSON-RPC messages to the correct transport instance.
|
||||
*/
|
||||
private readonly transports = new Map<string, SSEServerTransport>();
|
||||
|
||||
// -- Constructor ----------------------------------------------------------
|
||||
|
||||
constructor() {
|
||||
// 1. Express app + body parsing
|
||||
this.app = express();
|
||||
this.app.use(express.json());
|
||||
|
||||
// 2. Security & availability middleware
|
||||
this.app.use(dnsRebindingGuard);
|
||||
this.app.use(shutdownGuard(() => this.shuttingDown));
|
||||
|
||||
// 3. MCP server
|
||||
this.mcpServer = new McpServer(
|
||||
{ name: 'social-mcp', version: PACKAGE_VERSION },
|
||||
);
|
||||
|
||||
// 4. SSE transport endpoints
|
||||
this.setupSseEndpoints();
|
||||
|
||||
// 5. Health endpoint
|
||||
this.setupHealthEndpoint();
|
||||
|
||||
// 6. Bearer token auth for /api/* routes
|
||||
initBearerToken();
|
||||
this.app.use('/api', bearerAuth);
|
||||
|
||||
// 7. Error handler (must be registered last -- re-registered after plugins)
|
||||
this.app.use(errorHandler);
|
||||
}
|
||||
|
||||
// -- Plugin registration --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Register a platform plugin. Call this **before** `start()` so that all
|
||||
* tools and routes are wired up before the server begins accepting
|
||||
* connections.
|
||||
*/
|
||||
registerPlugin(plugin: PlatformPlugin): void {
|
||||
logger.info({ plugin: plugin.name }, 'Registering platform plugin');
|
||||
|
||||
plugin.registerTools(this.mcpServer, browserManager);
|
||||
|
||||
if (plugin.registerRoutes) {
|
||||
const router = express.Router();
|
||||
plugin.registerRoutes(router, browserManager);
|
||||
// Mount REST API routes under /api/xhs (for xiaohongshu)
|
||||
this.app.use(`/api/xhs`, router);
|
||||
}
|
||||
|
||||
this.plugins.push(plugin);
|
||||
}
|
||||
|
||||
// -- Lifecycle ------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Initialise all plugins and start listening for HTTP connections on
|
||||
* `config.host:config.port`.
|
||||
*
|
||||
* Returns a promise that resolves once the server is ready.
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
// Initialise plugins (sequentially so order is deterministic).
|
||||
for (const plugin of this.plugins) {
|
||||
if (plugin.init) {
|
||||
logger.info({ plugin: plugin.name }, 'Initialising plugin');
|
||||
await plugin.init();
|
||||
}
|
||||
}
|
||||
|
||||
// Re-register the error handler so it sits after any plugin routes.
|
||||
this.app.use(errorHandler);
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
this.httpServer = this.app
|
||||
.listen(config.port, config.host, () => {
|
||||
logger.info(
|
||||
{ host: config.host, port: config.port },
|
||||
'AppServer listening',
|
||||
);
|
||||
resolve();
|
||||
})
|
||||
.on('error', (err: Error) => {
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate graceful shutdown:
|
||||
* 1. Set the shutting-down flag (new requests get 503).
|
||||
* 2. Shut down every plugin.
|
||||
* 3. Close all SSE transports and the MCP server.
|
||||
* 4. Close the HTTP server.
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
if (this.shuttingDown) return;
|
||||
this.shuttingDown = true;
|
||||
|
||||
logger.info('AppServer shutting down');
|
||||
|
||||
// Shut down plugins
|
||||
for (const plugin of this.plugins) {
|
||||
if (plugin.shutdown) {
|
||||
try {
|
||||
await plugin.shutdown();
|
||||
} catch (err: unknown) {
|
||||
logger.warn({ err, plugin: plugin.name }, 'Error shutting down plugin');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close all SSE transports
|
||||
for (const [sessionId, transport] of this.transports) {
|
||||
try {
|
||||
await transport.close();
|
||||
} catch (err: unknown) {
|
||||
logger.warn({ err, sessionId }, 'Error closing SSE transport');
|
||||
}
|
||||
}
|
||||
this.transports.clear();
|
||||
|
||||
// Close the MCP server
|
||||
try {
|
||||
await this.mcpServer.close();
|
||||
} catch (err: unknown) {
|
||||
logger.warn({ err }, 'Error closing MCP server');
|
||||
}
|
||||
|
||||
// Close the HTTP server
|
||||
if (this.httpServer) {
|
||||
await new Promise<void>((resolve) => {
|
||||
this.httpServer!.close(() => {
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
this.httpServer = null;
|
||||
}
|
||||
|
||||
logger.info('AppServer shut down complete');
|
||||
}
|
||||
|
||||
// -- Private: SSE endpoints -----------------------------------------------
|
||||
|
||||
private setupSseEndpoints(): void {
|
||||
// GET /sse -- establish a new SSE connection
|
||||
this.app.get('/sse', (req, res) => {
|
||||
logger.debug({ ip: req.ip }, 'New SSE connection request');
|
||||
|
||||
const transport = new SSEServerTransport('/messages', res);
|
||||
const sessionId = transport.sessionId;
|
||||
|
||||
this.transports.set(sessionId, transport);
|
||||
|
||||
logger.info({ sessionId }, 'SSE transport created');
|
||||
|
||||
// Clean up when the client disconnects.
|
||||
res.on('close', () => {
|
||||
logger.info({ sessionId }, 'SSE client disconnected');
|
||||
this.transports.delete(sessionId);
|
||||
});
|
||||
|
||||
// Connect the transport to the MCP server. This starts the SSE
|
||||
// stream and sends the initial endpoint event to the client.
|
||||
void this.mcpServer.connect(transport).catch((err: unknown) => {
|
||||
logger.error({ err, sessionId }, 'Failed to connect SSE transport to MCP server');
|
||||
this.transports.delete(sessionId);
|
||||
});
|
||||
});
|
||||
|
||||
// POST /messages -- receive JSON-RPC messages for an existing session
|
||||
this.app.post('/messages', (req, res) => {
|
||||
const sessionId = req.query['sessionId'] as string | undefined;
|
||||
|
||||
if (!sessionId) {
|
||||
res.status(400).json({ error: 'Missing sessionId query parameter' });
|
||||
return;
|
||||
}
|
||||
|
||||
const transport = this.transports.get(sessionId);
|
||||
|
||||
if (!transport) {
|
||||
res.status(404).json({ error: 'Unknown or expired session' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Delegate to the transport; it will parse the body and route the
|
||||
// JSON-RPC message to the MCP server.
|
||||
void transport.handlePostMessage(req, res).catch((err: unknown) => {
|
||||
logger.error({ err, sessionId }, 'Error handling POST /messages');
|
||||
if (!res.headersSent) {
|
||||
res.status(500).json({ error: 'Internal server error' });
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// -- Private: Health endpoint ---------------------------------------------
|
||||
|
||||
private setupHealthEndpoint(): void {
|
||||
this.app.get('/health', (_req, res) => {
|
||||
void this.buildHealthResponse()
|
||||
.then((body) => {
|
||||
const status = body.healthy ? 200 : 503;
|
||||
res.status(status).json(body);
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
logger.error({ err }, 'Health check failed unexpectedly');
|
||||
res.status(500).json({ healthy: false, error: 'Health check error' });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private async buildHealthResponse(): Promise<Record<string, unknown>> {
|
||||
// Memory usage
|
||||
const mem = process.memoryUsage();
|
||||
const memoryMb = {
|
||||
rss: Math.round(mem.rss / 1024 / 1024),
|
||||
heapUsed: Math.round(mem.heapUsed / 1024 / 1024),
|
||||
heapTotal: Math.round(mem.heapTotal / 1024 / 1024),
|
||||
external: Math.round(mem.external / 1024 / 1024),
|
||||
};
|
||||
|
||||
// Active SSE sessions
|
||||
const activeSessions = this.transports.size;
|
||||
|
||||
// Plugin health checks
|
||||
const pluginHealth: Record<string, { healthy: boolean; message?: string }> = {};
|
||||
let allPluginsHealthy = true;
|
||||
|
||||
for (const plugin of this.plugins) {
|
||||
if (plugin.healthCheck) {
|
||||
try {
|
||||
const result = await plugin.healthCheck();
|
||||
pluginHealth[plugin.name] = result;
|
||||
if (!result.healthy) {
|
||||
allPluginsHealthy = false;
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
pluginHealth[plugin.name] = { healthy: false, message };
|
||||
allPluginsHealthy = false;
|
||||
}
|
||||
} else {
|
||||
pluginHealth[plugin.name] = { healthy: true };
|
||||
}
|
||||
}
|
||||
|
||||
const healthy = allPluginsHealthy && !this.shuttingDown;
|
||||
|
||||
return {
|
||||
healthy,
|
||||
version: PACKAGE_VERSION,
|
||||
uptime: Math.round(process.uptime()),
|
||||
shuttingDown: this.shuttingDown,
|
||||
activeSessions,
|
||||
plugins: pluginHealth,
|
||||
memory: memoryMb,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,278 @@
|
||||
import crypto from 'node:crypto';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
import type { Request, Response, NextFunction } from 'express';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { sanitizeErrorMessage } from '../utils/errors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allowed hosts for DNS rebinding protection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const allowedHosts = new Set<string>([
|
||||
'127.0.0.1',
|
||||
'localhost',
|
||||
`127.0.0.1:${config.port}`,
|
||||
`localhost:${config.port}`,
|
||||
]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 1. DNS Rebinding Guard
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reject requests whose `Host` header does not match an expected localhost
|
||||
* value. This prevents DNS rebinding attacks from reaching the service when
|
||||
* it is bound to the loopback interface.
|
||||
*/
|
||||
export function dnsRebindingGuard(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: NextFunction,
|
||||
): void {
|
||||
const host = req.headers.host;
|
||||
|
||||
if (!host || !allowedHosts.has(host)) {
|
||||
logger.warn(
|
||||
{ host, ip: req.ip, method: req.method, url: req.originalUrl },
|
||||
'DNS rebinding guard: blocked request with disallowed Host header',
|
||||
);
|
||||
res.status(403).json({ error: 'Forbidden' });
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 2. Shutdown Guard (factory)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Factory that returns middleware rejecting new requests once the server has
|
||||
* started its graceful shutdown sequence.
|
||||
*
|
||||
* @param getShuttingDown - Callback that returns `true` when shutdown is in progress.
|
||||
*/
|
||||
export function shutdownGuard(
|
||||
getShuttingDown: () => boolean,
|
||||
): (req: Request, res: Response, next: NextFunction) => void {
|
||||
return (_req: Request, res: Response, next: NextFunction): void => {
|
||||
if (getShuttingDown()) {
|
||||
res.status(503).json({ error: 'Server is shutting down' });
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 3. Error Handler
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Express error-handling middleware (four-argument signature).
|
||||
*
|
||||
* Logs the full error internally while returning a sanitized message to the
|
||||
* client so that internal filesystem paths, tokens, and stack traces are
|
||||
* never exposed.
|
||||
*/
|
||||
export function errorHandler(
|
||||
err: Error,
|
||||
req: Request,
|
||||
res: Response,
|
||||
_next: NextFunction,
|
||||
): void {
|
||||
logger.error(
|
||||
{ err, method: req.method, url: req.originalUrl },
|
||||
'Unhandled error in request pipeline',
|
||||
);
|
||||
|
||||
const message = sanitizeErrorMessage(err.message || 'Internal server error');
|
||||
|
||||
res.status(500).json({ error: message });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 4. Bearer Token Authentication
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const TOKEN_FILENAME = '.api-token';
|
||||
|
||||
/** Cached token once loaded/generated. */
|
||||
let cachedToken: string | null = null;
|
||||
|
||||
/**
|
||||
* Load or generate the Bearer API token.
|
||||
*
|
||||
* - On first start, generates a random 32-byte hex token.
|
||||
* - Stores it at `config.cookieDir/.api-token` with 0o600 permissions.
|
||||
* - On subsequent starts, reads the existing token from disk.
|
||||
* - Logs the token to console so the user can copy it.
|
||||
*
|
||||
* Must be called once during server startup.
|
||||
*/
|
||||
export function initBearerToken(): string {
|
||||
if (cachedToken) return cachedToken;
|
||||
|
||||
const tokenPath = path.join(config.cookieDir, TOKEN_FILENAME);
|
||||
|
||||
// Ensure the directory exists.
|
||||
try {
|
||||
fs.mkdirSync(config.cookieDir, { recursive: true, mode: 0o700 });
|
||||
} catch {
|
||||
// Directory may already exist.
|
||||
}
|
||||
|
||||
// Try to read an existing token.
|
||||
try {
|
||||
const existing = fs.readFileSync(tokenPath, 'utf-8').trim();
|
||||
if (existing.length >= 32) {
|
||||
cachedToken = existing;
|
||||
logger.info('API Bearer token loaded from disk');
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`\n REST API Bearer Token: ${cachedToken}\n`);
|
||||
return cachedToken;
|
||||
}
|
||||
} catch {
|
||||
// File does not exist or is unreadable — generate a new token.
|
||||
}
|
||||
|
||||
// Generate a new token.
|
||||
cachedToken = crypto.randomBytes(32).toString('hex');
|
||||
|
||||
fs.writeFileSync(tokenPath, cachedToken + '\n', { mode: 0o600 });
|
||||
|
||||
logger.info('New API Bearer token generated and saved');
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`\n REST API Bearer Token: ${cachedToken}\n`);
|
||||
|
||||
return cachedToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Express middleware that validates a `Bearer <token>` header against the
|
||||
* stored API token. Uses `crypto.timingSafeEqual` to prevent timing attacks.
|
||||
*
|
||||
* Apply to `/api/*` routes only.
|
||||
*/
|
||||
export function bearerAuth(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: NextFunction,
|
||||
): void {
|
||||
const authHeader = req.headers.authorization;
|
||||
|
||||
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
||||
res.status(401).json({
|
||||
success: false,
|
||||
error: { code: 'UNAUTHORIZED', message: 'Missing or invalid Authorization header' },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const provided = authHeader.slice(7); // Strip "Bearer "
|
||||
|
||||
if (!cachedToken) {
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: { code: 'INTERNAL', message: 'API token not initialized' },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Use timing-safe comparison to prevent timing attacks.
|
||||
const providedBuf = Buffer.from(provided, 'utf-8');
|
||||
const expectedBuf = Buffer.from(cachedToken, 'utf-8');
|
||||
|
||||
if (
|
||||
providedBuf.length !== expectedBuf.length ||
|
||||
!crypto.timingSafeEqual(providedBuf, expectedBuf)
|
||||
) {
|
||||
res.status(403).json({
|
||||
success: false,
|
||||
error: { code: 'FORBIDDEN', message: 'Invalid Bearer token' },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 5. Rate Limiter (in-memory, per-IP)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RateLimiterOptions {
|
||||
/** Time window in milliseconds. */
|
||||
windowMs: number;
|
||||
/** Maximum number of requests allowed in the window. */
|
||||
maxRequests: number;
|
||||
}
|
||||
|
||||
interface RateLimiterEntry {
|
||||
/** Request timestamps within the current window. */
|
||||
timestamps: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an in-memory per-IP rate limiter middleware.
|
||||
*
|
||||
* Returns 429 when the rate limit is exceeded. Old entries are automatically
|
||||
* cleaned up every 60 seconds to prevent memory leaks.
|
||||
*/
|
||||
export function rateLimiter(opts: RateLimiterOptions) {
|
||||
const store = new Map<string, RateLimiterEntry>();
|
||||
|
||||
// Periodic cleanup of stale entries.
|
||||
const cleanupInterval = setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const [ip, entry] of store) {
|
||||
entry.timestamps = entry.timestamps.filter((t) => now - t < opts.windowMs);
|
||||
if (entry.timestamps.length === 0) {
|
||||
store.delete(ip);
|
||||
}
|
||||
}
|
||||
}, 60_000);
|
||||
|
||||
// Do not let the cleanup timer keep the process alive during shutdown.
|
||||
if (typeof cleanupInterval === 'object' && 'unref' in cleanupInterval) {
|
||||
cleanupInterval.unref();
|
||||
}
|
||||
|
||||
return (req: Request, res: Response, next: NextFunction): void => {
|
||||
const ip = req.ip ?? req.socket.remoteAddress ?? 'unknown';
|
||||
const now = Date.now();
|
||||
|
||||
let entry = store.get(ip);
|
||||
if (!entry) {
|
||||
entry = { timestamps: [] };
|
||||
store.set(ip, entry);
|
||||
}
|
||||
|
||||
// Remove timestamps outside the current window.
|
||||
entry.timestamps = entry.timestamps.filter((t) => now - t < opts.windowMs);
|
||||
|
||||
if (entry.timestamps.length >= opts.maxRequests) {
|
||||
const retryAfterMs = opts.windowMs - (now - (entry.timestamps[0] ?? now));
|
||||
const retryAfterSec = Math.ceil(retryAfterMs / 1000);
|
||||
|
||||
res.set('Retry-After', String(retryAfterSec));
|
||||
res.status(429).json({
|
||||
success: false,
|
||||
error: {
|
||||
code: 'RATE_LIMITED',
|
||||
message: `Too many requests. Try again in ${String(retryAfterSec)} seconds.`,
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
entry.timestamps.push(now);
|
||||
next();
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,242 @@
|
||||
import { open, stat, unlink, writeFile, mkdir } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { randomUUID } from "node:crypto";
|
||||
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_MAX_SIZE_MB = 20;
|
||||
const BYTES_PER_MB = 1024 * 1024;
|
||||
|
||||
/** Minimum bytes we need to read to identify all supported formats. */
|
||||
const MAGIC_BYTES_LEN = 12;
|
||||
|
||||
/**
|
||||
* Map Content-Type values to file extensions. Used as a fallback when the
|
||||
* URL does not contain a recognisable extension.
|
||||
*/
|
||||
const MIME_TO_EXT: Record<string, string> = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
"video/mp4": ".mp4",
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Magic-byte detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function detectMimeType(header: Buffer): string | undefined {
|
||||
// JPEG: starts with FF D8 FF
|
||||
if (header[0] === 0xff && header[1] === 0xd8 && header[2] === 0xff) {
|
||||
return "image/jpeg";
|
||||
}
|
||||
|
||||
// PNG: starts with 89 50 4E 47
|
||||
if (
|
||||
header[0] === 0x89 &&
|
||||
header[1] === 0x50 &&
|
||||
header[2] === 0x4e &&
|
||||
header[3] === 0x47
|
||||
) {
|
||||
return "image/png";
|
||||
}
|
||||
|
||||
// WebP: RIFF....WEBP (bytes 0-3 = "RIFF", bytes 8-11 = "WEBP")
|
||||
if (
|
||||
header[0] === 0x52 &&
|
||||
header[1] === 0x49 &&
|
||||
header[2] === 0x46 &&
|
||||
header[3] === 0x46 &&
|
||||
header[8] === 0x57 &&
|
||||
header[9] === 0x45 &&
|
||||
header[10] === 0x42 &&
|
||||
header[11] === 0x50
|
||||
) {
|
||||
return "image/webp";
|
||||
}
|
||||
|
||||
// MP4: "ftyp" at byte offset 4
|
||||
if (
|
||||
header[4] === 0x66 &&
|
||||
header[5] === 0x74 &&
|
||||
header[6] === 0x79 &&
|
||||
header[7] === 0x70
|
||||
) {
|
||||
return "video/mp4";
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// validateMediaPath
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Validate that a local media file exists, is within size limits, and is one
|
||||
* of the supported media types (JPEG, PNG, WebP, MP4).
|
||||
*
|
||||
* @returns The resolved absolute path to the file.
|
||||
*/
|
||||
export async function validateMediaPath(
|
||||
filePath: string,
|
||||
opts?: { maxSizeMB?: number },
|
||||
): Promise<string> {
|
||||
const resolved = path.resolve(filePath);
|
||||
|
||||
// Guard against path traversal -- reject if the *original* input tries to
|
||||
// escape via ".." segments. We check the raw input rather than the resolved
|
||||
// path so that a legitimate directory named ".." is not silently accepted.
|
||||
if (filePath.includes("..")) {
|
||||
throw new Error(
|
||||
`Path traversal detected: the path must not contain ".." segments`,
|
||||
);
|
||||
}
|
||||
|
||||
// Existence & size check
|
||||
let stats: Awaited<ReturnType<typeof stat>>;
|
||||
try {
|
||||
stats = await stat(resolved);
|
||||
} catch {
|
||||
throw new Error(`File not found: ${resolved}`);
|
||||
}
|
||||
|
||||
if (!stats.isFile()) {
|
||||
throw new Error(`Not a regular file: ${resolved}`);
|
||||
}
|
||||
|
||||
const maxBytes = (opts?.maxSizeMB ?? DEFAULT_MAX_SIZE_MB) * BYTES_PER_MB;
|
||||
if (stats.size > maxBytes) {
|
||||
const sizeMB = (stats.size / BYTES_PER_MB).toFixed(2);
|
||||
const limitMB = (maxBytes / BYTES_PER_MB).toFixed(0);
|
||||
throw new Error(
|
||||
`File too large: ${sizeMB} MB exceeds the ${limitMB} MB limit`,
|
||||
);
|
||||
}
|
||||
|
||||
if (stats.size < MAGIC_BYTES_LEN) {
|
||||
throw new Error(`File too small to identify media type (${stats.size} bytes)`);
|
||||
}
|
||||
|
||||
// MIME type check via magic bytes
|
||||
const fd = await open(resolved, "r");
|
||||
try {
|
||||
const buf = Buffer.alloc(MAGIC_BYTES_LEN);
|
||||
await fd.read(buf, 0, MAGIC_BYTES_LEN, 0);
|
||||
const mime = detectMimeType(buf);
|
||||
|
||||
if (mime === undefined) {
|
||||
throw new Error(
|
||||
`Unsupported media type for file: ${resolved}. ` +
|
||||
`Supported types: JPEG, PNG, WebP, MP4`,
|
||||
);
|
||||
}
|
||||
|
||||
logger.debug({ path: resolved, mime, bytes: stats.size }, "媒体文件校验通过");
|
||||
} finally {
|
||||
await fd.close();
|
||||
}
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// downloadFile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Derive a file extension from the URL path or the Content-Type header.
|
||||
* Falls back to an empty string when neither source yields a known extension.
|
||||
*/
|
||||
function deriveExtension(url: string, contentType: string | null): string {
|
||||
// Try to pull an extension from the URL pathname first.
|
||||
try {
|
||||
const pathname = new URL(url).pathname;
|
||||
const ext = path.extname(pathname).toLowerCase();
|
||||
if (ext && ext.length <= 5) {
|
||||
return ext;
|
||||
}
|
||||
} catch {
|
||||
// URL parsing failed -- fall through to Content-Type.
|
||||
}
|
||||
|
||||
// Fall back to Content-Type header.
|
||||
if (contentType) {
|
||||
const baseMime = contentType.split(";")[0]?.trim().toLowerCase();
|
||||
if (baseMime) {
|
||||
const ext = MIME_TO_EXT[baseMime];
|
||||
if (ext) {
|
||||
return ext;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a file from a remote URL and save it into `destDir` with a random
|
||||
* filename.
|
||||
*
|
||||
* @returns The absolute path to the downloaded file.
|
||||
*/
|
||||
export async function downloadFile(
|
||||
url: string,
|
||||
destDir: string,
|
||||
): Promise<string> {
|
||||
const resolvedDir = path.resolve(destDir);
|
||||
|
||||
// Ensure destination directory exists (recursive in case parents are missing).
|
||||
await mkdir(resolvedDir, { recursive: true });
|
||||
|
||||
logger.debug({ url, destDir: resolvedDir }, "开始下载文件");
|
||||
|
||||
const response = await fetch(url);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Download failed: HTTP ${String(response.status)} ${response.statusText} for ${url}`,
|
||||
);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get("content-type");
|
||||
const ext = deriveExtension(url, contentType);
|
||||
const filename = `${randomUUID()}${ext}`;
|
||||
const destPath = path.join(resolvedDir, filename);
|
||||
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
const buffer = Buffer.from(arrayBuffer);
|
||||
|
||||
await writeFile(destPath, buffer, { mode: 0o600 });
|
||||
|
||||
logger.debug(
|
||||
{ path: destPath, bytes: buffer.length, mime: contentType },
|
||||
"文件下载完成",
|
||||
);
|
||||
|
||||
return destPath;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// cleanupFile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Delete a local file. Silently succeeds if the file does not exist.
|
||||
*/
|
||||
export async function cleanupFile(filePath: string): Promise<void> {
|
||||
try {
|
||||
await unlink(filePath);
|
||||
logger.debug({ path: filePath }, "临时文件已清理");
|
||||
} catch (err: unknown) {
|
||||
// ENOENT means the file was already gone -- that is fine.
|
||||
if (err instanceof Error && "code" in err && err.code === "ENOENT") {
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
import { logger } from './logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Error classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export enum ErrorCategory {
|
||||
TIMEOUT = 'TIMEOUT',
|
||||
AUTH_REQUIRED = 'AUTH_REQUIRED',
|
||||
SELECTOR_NOT_FOUND = 'SELECTOR_NOT_FOUND',
|
||||
NETWORK = 'NETWORK',
|
||||
PLATFORM_ERROR = 'PLATFORM_ERROR',
|
||||
INTERNAL = 'INTERNAL',
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect an Error's `message` and `name` to determine which category it
|
||||
* belongs to. The checks are intentionally broad so that errors surfaced by
|
||||
* Playwright, Puppeteer, or native fetch all get classified correctly.
|
||||
*/
|
||||
export function classifyError(err: Error): ErrorCategory {
|
||||
const haystack = `${err.name} ${err.message}`.toLowerCase();
|
||||
|
||||
// Selector check BEFORE timeout — Playwright's selector timeout message
|
||||
// is "Timeout waiting for selector ..." which contains both keywords.
|
||||
// The more specific match must come first.
|
||||
if (
|
||||
haystack.includes('waiting for selector') ||
|
||||
haystack.includes('找不到元素')
|
||||
) {
|
||||
return ErrorCategory.SELECTOR_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (haystack.includes('timeout') || err.name === 'TimeoutError') {
|
||||
return ErrorCategory.TIMEOUT;
|
||||
}
|
||||
|
||||
if (haystack.includes('net::err_')) {
|
||||
return ErrorCategory.NETWORK;
|
||||
}
|
||||
|
||||
if (haystack.includes('login') || haystack.includes('登录')) {
|
||||
return ErrorCategory.AUTH_REQUIRED;
|
||||
}
|
||||
|
||||
return ErrorCategory.INTERNAL;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Message sanitization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Strip potentially sensitive or noisy information from an error message
|
||||
* before it is returned to the MCP client.
|
||||
*
|
||||
* - File-system paths (/xxx/yyy/...) -> [path]
|
||||
* - URLs (http(s)://...) -> [url]
|
||||
* - Long hex strings (>= 32 chars) -> [hash]
|
||||
* - Truncated to 200 characters
|
||||
*/
|
||||
export function sanitizeErrorMessage(message: string): string {
|
||||
let sanitized = message;
|
||||
|
||||
// Replace URLs first so that the path regex does not partially match them.
|
||||
sanitized = sanitized.replace(/https?:\/\/[^\s)'"]+/g, '[url]');
|
||||
|
||||
// Replace absolute file-system paths (Unix-style).
|
||||
sanitized = sanitized.replace(/\/(?:[^\s/]+\/)+[^\s/)'":]*/g, '[path]');
|
||||
|
||||
// Replace long hexadecimal strings (session ids, hashes, tokens, etc.).
|
||||
sanitized = sanitized.replace(/[0-9a-fA-F]{32,}/g, '[hash]');
|
||||
|
||||
// Truncate to 200 characters.
|
||||
if (sanitized.length > 200) {
|
||||
sanitized = sanitized.slice(0, 200);
|
||||
}
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MCP tool result type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type McpToolResult = {
|
||||
content: Array<{ type: 'text'; text: string }>;
|
||||
isError?: boolean;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Error-handling wrapper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Execute an MCP tool handler inside a try/catch that automatically
|
||||
* classifies, sanitizes, and logs any thrown error before returning a
|
||||
* well-structured MCP error response.
|
||||
*
|
||||
* Usage:
|
||||
* ```ts
|
||||
* const result = await withErrorHandling('publish_post', async () => {
|
||||
* // ... tool logic that returns McpToolResult
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export async function withErrorHandling(
|
||||
toolName: string,
|
||||
fn: () => Promise<McpToolResult>,
|
||||
): Promise<McpToolResult> {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (caught: unknown) {
|
||||
const err =
|
||||
caught instanceof Error ? caught : new Error(String(caught));
|
||||
|
||||
const category = classifyError(err);
|
||||
const sanitized = sanitizeErrorMessage(err.message);
|
||||
|
||||
logger.error(
|
||||
{ tool: toolName, category, err },
|
||||
'工具执行失败',
|
||||
);
|
||||
|
||||
const payload = JSON.stringify({
|
||||
tool: toolName,
|
||||
error: category,
|
||||
message: sanitized,
|
||||
});
|
||||
|
||||
return {
|
||||
content: [{ type: 'text', text: payload }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
import pino from "pino";
|
||||
|
||||
const isProduction = process.env["NODE_ENV"] === "production";
|
||||
|
||||
// In production, suppress Playwright debug output that bypasses pino.
|
||||
if (isProduction) {
|
||||
delete process.env["DEBUG"];
|
||||
}
|
||||
|
||||
const redactPaths: string[] = [
|
||||
// Auth & credentials
|
||||
"**.cookie",
|
||||
"**.cookies",
|
||||
"**.set-cookie",
|
||||
"**.authorization",
|
||||
"**.password",
|
||||
"**.secret",
|
||||
|
||||
// Tokens
|
||||
"**.token",
|
||||
"**.xsec_token",
|
||||
"**.access_token",
|
||||
"**.refresh_token",
|
||||
|
||||
// API keys
|
||||
"**.api_key",
|
||||
"**.apikey",
|
||||
|
||||
// Sessions
|
||||
"**.sessionid",
|
||||
"**.session_id",
|
||||
|
||||
// Playwright StorageState structures
|
||||
"**.cookies[*].value",
|
||||
"**.origins[*].localStorage[*].value",
|
||||
];
|
||||
|
||||
const errorSerializer = (err: Error): Record<string, unknown> => {
|
||||
const serialized: Record<string, unknown> = {
|
||||
type: err.constructor?.name ?? "Error",
|
||||
message: err.message,
|
||||
};
|
||||
|
||||
if (!isProduction && err.stack) {
|
||||
serialized["stack"] = err.stack;
|
||||
}
|
||||
|
||||
return serialized;
|
||||
};
|
||||
|
||||
export const logger: pino.Logger = pino({
|
||||
level: process.env["LOG_LEVEL"] ?? "info",
|
||||
redact: {
|
||||
paths: redactPaths,
|
||||
censor: "[REDACTED]",
|
||||
},
|
||||
serializers: {
|
||||
err: errorSerializer,
|
||||
error: errorSerializer,
|
||||
},
|
||||
...(isProduction
|
||||
? {}
|
||||
: {
|
||||
transport: {
|
||||
target: "pino-pretty",
|
||||
},
|
||||
}),
|
||||
});
|
||||
@@ -0,0 +1,160 @@
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import fs from 'node:fs/promises';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import crypto from 'node:crypto';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mocks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// We need to mock config BEFORE importing CookieStore, because the module
|
||||
// reads config.cookieDir at import time.
|
||||
|
||||
let testDir: string;
|
||||
|
||||
vi.mock('../src/config/index.js', () => ({
|
||||
config: {
|
||||
get cookieDir() {
|
||||
return testDir;
|
||||
},
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock('../src/utils/logger.js', () => ({
|
||||
logger: {
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
info: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
child: vi.fn(() => ({
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
info: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
})),
|
||||
},
|
||||
}));
|
||||
|
||||
// Import AFTER mocks are declared.
|
||||
import { CookieStore, type StorageState } from '../src/cookie/store.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeStorageState(cookieCount = 1): StorageState {
|
||||
const cookies = Array.from({ length: cookieCount }, (_, i) => ({
|
||||
name: `cookie_${i}`,
|
||||
value: `value_${i}`,
|
||||
domain: '.example.com',
|
||||
path: '/',
|
||||
expires: Date.now() / 1000 + 3600,
|
||||
httpOnly: true,
|
||||
secure: true,
|
||||
sameSite: 'Lax' as const,
|
||||
}));
|
||||
|
||||
return {
|
||||
cookies,
|
||||
origins: [],
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('CookieStore', () => {
|
||||
let store: CookieStore;
|
||||
|
||||
beforeEach(async () => {
|
||||
testDir = path.join(os.tmpdir(), `cookie-store-test-${crypto.randomUUID()}`);
|
||||
await fs.mkdir(testDir, { recursive: true });
|
||||
store = new CookieStore();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fs.rm(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// -- save ---------------------------------------------------------------
|
||||
|
||||
it('save creates the platform directory', async () => {
|
||||
const state = makeStorageState();
|
||||
await store.save('twitter', state);
|
||||
|
||||
const dirStat = await fs.stat(path.join(testDir, 'twitter'));
|
||||
expect(dirStat.isDirectory()).toBe(true);
|
||||
});
|
||||
|
||||
it('save writes the cookie file with restricted permissions (0o600)', async () => {
|
||||
const state = makeStorageState();
|
||||
await store.save('twitter', state);
|
||||
|
||||
const filePath = store.getPath('twitter');
|
||||
const fileStat = await fs.stat(filePath);
|
||||
|
||||
// On Unix-like systems the mode includes the file type bits; mask to
|
||||
// the permission bits only.
|
||||
const perms = fileStat.mode & 0o777;
|
||||
expect(perms).toBe(0o600);
|
||||
});
|
||||
|
||||
// -- load ---------------------------------------------------------------
|
||||
|
||||
it('load returns saved data', async () => {
|
||||
const state = makeStorageState(3);
|
||||
await store.save('instagram', state);
|
||||
|
||||
const loaded = await store.load('instagram');
|
||||
expect(loaded).not.toBeNull();
|
||||
expect(loaded!.cookies).toHaveLength(3);
|
||||
expect(loaded!.cookies[0]!.name).toBe('cookie_0');
|
||||
});
|
||||
|
||||
it('load returns null for non-existent platform', async () => {
|
||||
const loaded = await store.load('nonexistent');
|
||||
expect(loaded).toBeNull();
|
||||
});
|
||||
|
||||
// -- delete -------------------------------------------------------------
|
||||
|
||||
it('delete removes the cookie file', async () => {
|
||||
const state = makeStorageState();
|
||||
await store.save('weibo', state);
|
||||
|
||||
// Verify the file exists first.
|
||||
const filePath = store.getPath('weibo');
|
||||
await expect(fs.access(filePath)).resolves.toBeUndefined();
|
||||
|
||||
await store.delete('weibo');
|
||||
|
||||
// After deletion the file should no longer exist.
|
||||
await expect(fs.access(filePath)).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('delete succeeds silently for a non-existent file', async () => {
|
||||
// Should not throw even though no file was ever saved.
|
||||
await expect(store.delete('ghost')).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
// -- atomic write -------------------------------------------------------
|
||||
|
||||
it('save uses atomic write (temp file renamed to final path)', async () => {
|
||||
const state = makeStorageState();
|
||||
|
||||
// Spy on fs.rename to verify it is called.
|
||||
const renameSpy = vi.spyOn(fs, 'rename');
|
||||
|
||||
await store.save('atomic-test', state);
|
||||
|
||||
expect(renameSpy).toHaveBeenCalledTimes(1);
|
||||
|
||||
const [tmpArg, finalArg] = renameSpy.mock.calls[0]!;
|
||||
expect(String(tmpArg)).toContain('.tmp.');
|
||||
expect(String(finalArg)).toBe(store.getPath('atomic-test'));
|
||||
|
||||
renameSpy.mockRestore();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,149 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
|
||||
// Mock the logger before importing the module under test.
|
||||
vi.mock('../src/utils/logger.js', () => ({
|
||||
logger: {
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
info: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
child: vi.fn(() => ({
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
info: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
})),
|
||||
},
|
||||
}));
|
||||
|
||||
import {
|
||||
classifyError,
|
||||
sanitizeErrorMessage,
|
||||
withErrorHandling,
|
||||
ErrorCategory,
|
||||
} from '../src/utils/errors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// classifyError
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('classifyError', () => {
|
||||
it('returns TIMEOUT when error name is "TimeoutError"', () => {
|
||||
const err = new Error('something happened');
|
||||
err.name = 'TimeoutError';
|
||||
expect(classifyError(err)).toBe(ErrorCategory.TIMEOUT);
|
||||
});
|
||||
|
||||
it('returns TIMEOUT when message contains "timeout"', () => {
|
||||
const err = new Error('Connection timeout after 30s');
|
||||
expect(classifyError(err)).toBe(ErrorCategory.TIMEOUT);
|
||||
});
|
||||
|
||||
it('returns NETWORK when message contains "net::err_"', () => {
|
||||
const err = new Error('net::err_connection_refused');
|
||||
expect(classifyError(err)).toBe(ErrorCategory.NETWORK);
|
||||
});
|
||||
|
||||
it('returns AUTH_REQUIRED when message contains "login"', () => {
|
||||
const err = new Error('Please login to continue');
|
||||
expect(classifyError(err)).toBe(ErrorCategory.AUTH_REQUIRED);
|
||||
});
|
||||
|
||||
it('returns AUTH_REQUIRED when message contains Chinese login word', () => {
|
||||
const err = new Error('请先登录');
|
||||
expect(classifyError(err)).toBe(ErrorCategory.AUTH_REQUIRED);
|
||||
});
|
||||
|
||||
it('returns SELECTOR_NOT_FOUND when message contains "waiting for selector"', () => {
|
||||
const err = new Error('Timeout waiting for selector "#submit-btn"');
|
||||
expect(classifyError(err)).toBe(ErrorCategory.SELECTOR_NOT_FOUND);
|
||||
});
|
||||
|
||||
it('returns INTERNAL for unrecognised errors', () => {
|
||||
const err = new Error('Something unexpected happened');
|
||||
expect(classifyError(err)).toBe(ErrorCategory.INTERNAL);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// sanitizeErrorMessage
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('sanitizeErrorMessage', () => {
|
||||
it('replaces absolute file-system paths with [path]', () => {
|
||||
const msg = 'Failed to read /home/user/data/secrets.json';
|
||||
const result = sanitizeErrorMessage(msg);
|
||||
expect(result).toContain('[path]');
|
||||
expect(result).not.toContain('/home/user/data/secrets.json');
|
||||
});
|
||||
|
||||
it('replaces URLs with [url]', () => {
|
||||
const msg = 'Fetch failed for https://api.example.com/v1/token';
|
||||
const result = sanitizeErrorMessage(msg);
|
||||
expect(result).toContain('[url]');
|
||||
expect(result).not.toContain('https://api.example.com');
|
||||
});
|
||||
|
||||
it('replaces long hex strings (>=32 chars) with [hash]', () => {
|
||||
const hex = 'a'.repeat(32);
|
||||
const msg = `Invalid session id: ${hex}`;
|
||||
const result = sanitizeErrorMessage(msg);
|
||||
expect(result).toContain('[hash]');
|
||||
expect(result).not.toContain(hex);
|
||||
});
|
||||
|
||||
it('truncates messages longer than 200 characters', () => {
|
||||
const msg = 'x'.repeat(300);
|
||||
const result = sanitizeErrorMessage(msg);
|
||||
expect(result.length).toBe(200);
|
||||
});
|
||||
|
||||
it('leaves short plain messages unchanged', () => {
|
||||
const msg = 'Something went wrong';
|
||||
expect(sanitizeErrorMessage(msg)).toBe(msg);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// withErrorHandling
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('withErrorHandling', () => {
|
||||
it('passes through successful results', async () => {
|
||||
const expected = {
|
||||
content: [{ type: 'text' as const, text: 'ok' }],
|
||||
};
|
||||
|
||||
const result = await withErrorHandling('test_tool', async () => expected);
|
||||
|
||||
expect(result).toEqual(expected);
|
||||
expect(result.isError).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns isError:true with classified error JSON on failure', async () => {
|
||||
const result = await withErrorHandling('publish_post', async () => {
|
||||
throw new Error('Connection timeout after 30s');
|
||||
});
|
||||
|
||||
expect(result.isError).toBe(true);
|
||||
expect(result.content).toHaveLength(1);
|
||||
|
||||
const payload = JSON.parse(result.content[0]!.text);
|
||||
expect(payload.tool).toBe('publish_post');
|
||||
expect(payload.error).toBe(ErrorCategory.TIMEOUT);
|
||||
expect(typeof payload.message).toBe('string');
|
||||
});
|
||||
|
||||
it('wraps non-Error throws into an Error', async () => {
|
||||
const result = await withErrorHandling('my_tool', async () => {
|
||||
throw 'raw string error';
|
||||
});
|
||||
|
||||
expect(result.isError).toBe(true);
|
||||
|
||||
const payload = JSON.parse(result.content[0]!.text);
|
||||
expect(payload.tool).toBe('my_tool');
|
||||
expect(payload.error).toBe(ErrorCategory.INTERNAL);
|
||||
expect(payload.message).toContain('raw string error');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,153 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mocks -- must be declared before importing the module under test.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// The middleware module reads `config.port` at module scope to build the
|
||||
// allowedHosts set, so we need the mock in place before the import.
|
||||
vi.mock('../src/config/index.js', () => ({
|
||||
config: {
|
||||
port: 3000,
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock('../src/utils/logger.js', () => ({
|
||||
logger: {
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
info: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
child: vi.fn(() => ({
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
info: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
})),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock('../src/utils/errors.js', () => ({
|
||||
sanitizeErrorMessage: vi.fn((msg: string) => msg),
|
||||
}));
|
||||
|
||||
import { dnsRebindingGuard } from '../src/server/middleware.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers -- lightweight Express req/res/next fakes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface FakeRequest {
|
||||
headers: Record<string, string | undefined>;
|
||||
ip?: string;
|
||||
method?: string;
|
||||
originalUrl?: string;
|
||||
}
|
||||
|
||||
interface FakeResponse {
|
||||
statusCode: number;
|
||||
body: unknown;
|
||||
status: (code: number) => FakeResponse;
|
||||
json: (data: unknown) => FakeResponse;
|
||||
}
|
||||
|
||||
function createReq(host?: string): FakeRequest {
|
||||
return {
|
||||
headers: host !== undefined ? { host } : {},
|
||||
ip: '127.0.0.1',
|
||||
method: 'GET',
|
||||
originalUrl: '/test',
|
||||
};
|
||||
}
|
||||
|
||||
function createRes(): FakeResponse {
|
||||
const res: FakeResponse = {
|
||||
statusCode: 200,
|
||||
body: undefined,
|
||||
status(code: number) {
|
||||
res.statusCode = code;
|
||||
return res;
|
||||
},
|
||||
json(data: unknown) {
|
||||
res.body = data;
|
||||
return res;
|
||||
},
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// dnsRebindingGuard
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('dnsRebindingGuard', () => {
|
||||
let next: ReturnType<typeof vi.fn>;
|
||||
|
||||
beforeEach(() => {
|
||||
next = vi.fn();
|
||||
});
|
||||
|
||||
it('allows requests with Host: 127.0.0.1', () => {
|
||||
const req = createReq('127.0.0.1');
|
||||
const res = createRes();
|
||||
|
||||
dnsRebindingGuard(req as any, res as any, next);
|
||||
|
||||
expect(next).toHaveBeenCalledTimes(1);
|
||||
expect(res.statusCode).toBe(200);
|
||||
});
|
||||
|
||||
it('allows requests with Host: localhost', () => {
|
||||
const req = createReq('localhost');
|
||||
const res = createRes();
|
||||
|
||||
dnsRebindingGuard(req as any, res as any, next);
|
||||
|
||||
expect(next).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('allows requests with Host: localhost:<PORT>', () => {
|
||||
const req = createReq('localhost:3000');
|
||||
const res = createRes();
|
||||
|
||||
dnsRebindingGuard(req as any, res as any, next);
|
||||
|
||||
expect(next).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('allows requests with Host: 127.0.0.1:<PORT>', () => {
|
||||
const req = createReq('127.0.0.1:3000');
|
||||
const res = createRes();
|
||||
|
||||
dnsRebindingGuard(req as any, res as any, next);
|
||||
|
||||
expect(next).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('blocks requests with Host: evil.com', () => {
|
||||
const req = createReq('evil.com');
|
||||
const res = createRes();
|
||||
|
||||
dnsRebindingGuard(req as any, res as any, next);
|
||||
|
||||
expect(next).not.toHaveBeenCalled();
|
||||
expect(res.statusCode).toBe(403);
|
||||
expect(res.body).toEqual({ error: 'Forbidden' });
|
||||
});
|
||||
|
||||
it('blocks requests with no Host header', () => {
|
||||
const req: FakeRequest = {
|
||||
headers: {},
|
||||
ip: '127.0.0.1',
|
||||
method: 'GET',
|
||||
originalUrl: '/test',
|
||||
};
|
||||
const res = createRes();
|
||||
|
||||
dnsRebindingGuard(req as any, res as any, next);
|
||||
|
||||
expect(next).not.toHaveBeenCalled();
|
||||
expect(res.statusCode).toBe(403);
|
||||
expect(res.body).toEqual({ error: 'Forbidden' });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true
|
||||
},
|
||||
"include": ["src"],
|
||||
"exclude": ["node_modules", "dist", "**/*.test.ts"]
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
import { defineConfig } from 'tsup';
|
||||
|
||||
export default defineConfig({
|
||||
entry: ['src/index.ts'],
|
||||
format: ['esm'],
|
||||
target: 'node22',
|
||||
outDir: 'dist',
|
||||
clean: true,
|
||||
sourcemap: true,
|
||||
dts: false,
|
||||
splitting: false,
|
||||
shims: false,
|
||||
});
|
||||
@@ -0,0 +1,8 @@
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
include: ['test/**/*.test.ts'],
|
||||
environment: 'node',
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user