feat: social-mcp 初始实现
多平台社交自动化 MCP 服务,首批支持小红书。 - 13 个 MCP 工具:登录管理、内容浏览、发布、互动 - 13 个 REST API 端点,支持 Bearer token 认证和限流 - BrowserManager:串行队列、背压、崩溃恢复 - Cookie 持久化:原子写入、0600 权限 - 安全:DNS rebinding 防御、错误脱敏、深层日志 redact - Docker 部署支持 - 28 个单元测试全部通过
This commit is contained in:
@@ -0,0 +1,374 @@
|
||||
import {
|
||||
chromium,
|
||||
type Browser,
|
||||
type BrowserContext,
|
||||
type Page,
|
||||
} from 'rebrowser-playwright';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { cookieStore } from '../cookie/store.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** How long acquirePage() waits before auto-releasing the page (5 minutes). */
|
||||
const ACQUIRE_SAFETY_TIMEOUT_MS = 5 * 60_000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// BrowserManager
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class BrowserManager {
|
||||
// -- State ----------------------------------------------------------------
|
||||
|
||||
private browser: Browser | null = null;
|
||||
private contexts = new Map<string, BrowserContext>();
|
||||
private queues = new Map<string, Promise<void>>();
|
||||
private queueDepths = new Map<string, number>();
|
||||
|
||||
/**
|
||||
* Mutex-style promise that prevents concurrent browser launches.
|
||||
* While a launch is in-flight every caller awaits the same promise.
|
||||
*/
|
||||
private launchPromise: Promise<Browser> | null = null;
|
||||
|
||||
// -- Public API -----------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Execute `fn` on a fresh page inside the platform-specific context.
|
||||
*
|
||||
* Operations are serialised per-platform through a promise chain so that
|
||||
* at most one page is active per platform at any time. Back-pressure is
|
||||
* enforced via `config.maxQueueDepth`.
|
||||
*
|
||||
* @param platform - Platform identifier (e.g. "twitter", "xiaohongshu").
|
||||
* @param fn - Async callback that receives the page.
|
||||
* @param timeoutMs - Optional timeout override (defaults to the
|
||||
* `operationTimeouts.default` value from config).
|
||||
* @returns The value returned by `fn`.
|
||||
*/
|
||||
async withPage<T>(
|
||||
platform: string,
|
||||
fn: (page: Page) => Promise<T>,
|
||||
timeoutMs?: number,
|
||||
): Promise<T> {
|
||||
// -- Back-pressure check ------------------------------------------------
|
||||
const currentDepth = this.queueDepths.get(platform) ?? 0;
|
||||
if (currentDepth >= config.maxQueueDepth) {
|
||||
throw new Error(
|
||||
`Queue full for platform "${platform}" (depth=${currentDepth}, ` +
|
||||
`max=${config.maxQueueDepth}). Try again later.`,
|
||||
);
|
||||
}
|
||||
this.queueDepths.set(platform, currentDepth + 1);
|
||||
|
||||
// -- Resolve effective timeout ------------------------------------------
|
||||
const effectiveTimeout =
|
||||
timeoutMs ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
// -- Build the task and chain onto the per-platform queue ---------------
|
||||
const previous = this.queues.get(platform) ?? Promise.resolve();
|
||||
|
||||
const task: Promise<T> = previous.then(async () => {
|
||||
const browser = await this.ensureBrowser();
|
||||
|
||||
// The browser may have disconnected while this task was queued.
|
||||
if (!browser.isConnected()) {
|
||||
throw new Error('Browser disconnected while waiting in queue');
|
||||
}
|
||||
|
||||
const ctx = await this.getContext(platform);
|
||||
const page = await ctx.newPage();
|
||||
|
||||
page.setDefaultTimeout(effectiveTimeout);
|
||||
page.setDefaultNavigationTimeout(effectiveTimeout);
|
||||
|
||||
try {
|
||||
// Race the user function against a hard timeout. clearTimeout
|
||||
// is handled implicitly: when fn resolves first the timeout
|
||||
// promise is simply abandoned and its timer unref'd so it cannot
|
||||
// keep the process alive.
|
||||
const result = await Promise.race<T>([
|
||||
fn(page),
|
||||
this.createTimeout<T>(effectiveTimeout, platform),
|
||||
]);
|
||||
return result;
|
||||
} finally {
|
||||
await page.close().catch((err: unknown) => {
|
||||
logger.warn({ err, platform }, 'Failed to close page');
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Swallow errors so the promise chain continues for the next caller.
|
||||
// The actual rejection is still returned to **this** caller via `task`.
|
||||
const chainContinuation = task.then(
|
||||
() => {},
|
||||
() => {},
|
||||
);
|
||||
|
||||
// Decrement queue depth when this task settles, regardless of outcome.
|
||||
void chainContinuation.finally(() => {
|
||||
const depth = this.queueDepths.get(platform) ?? 1;
|
||||
if (depth <= 1) {
|
||||
this.queueDepths.delete(platform);
|
||||
} else {
|
||||
this.queueDepths.set(platform, depth - 1);
|
||||
}
|
||||
});
|
||||
|
||||
this.queues.set(platform, chainContinuation);
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquire a page that the caller manages manually (e.g. for interactive
|
||||
* login flows). The caller **must** call `release()` when finished.
|
||||
*
|
||||
* A safety-net timer auto-releases the page after 5 minutes to prevent
|
||||
* resource leaks if the caller forgets.
|
||||
*
|
||||
* @param platform - Platform identifier.
|
||||
* @returns Object with `page` and an idempotent `release` function.
|
||||
*/
|
||||
async acquirePage(
|
||||
platform: string,
|
||||
): Promise<{ page: Page; release: () => Promise<void> }> {
|
||||
await this.ensureBrowser();
|
||||
const ctx = await this.getContext(platform);
|
||||
const page = await ctx.newPage();
|
||||
|
||||
let released = false;
|
||||
|
||||
const release = async (): Promise<void> => {
|
||||
if (released) return;
|
||||
released = true;
|
||||
clearTimeout(safetyTimer);
|
||||
await page.close().catch((err: unknown) => {
|
||||
logger.warn({ err, platform }, 'Failed to close acquired page');
|
||||
});
|
||||
};
|
||||
|
||||
const safetyTimer = setTimeout(() => {
|
||||
if (!released) {
|
||||
logger.warn(
|
||||
{ platform },
|
||||
`acquirePage safety timeout: auto-releasing page after ${ACQUIRE_SAFETY_TIMEOUT_MS}ms`,
|
||||
);
|
||||
void release();
|
||||
}
|
||||
}, ACQUIRE_SAFETY_TIMEOUT_MS);
|
||||
|
||||
// Prevent the timer from keeping the Node.js process alive.
|
||||
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
|
||||
safetyTimer.unref();
|
||||
}
|
||||
|
||||
return { page, release };
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the current cookie / storage state of a platform's browser context
|
||||
* to disk via the CookieStore.
|
||||
*
|
||||
* @param platform - Platform identifier whose context should be persisted.
|
||||
*/
|
||||
async saveCookies(platform: string): Promise<void> {
|
||||
const ctx = this.contexts.get(platform);
|
||||
if (!ctx) {
|
||||
logger.warn(
|
||||
{ platform },
|
||||
'saveCookies called but no context exists for platform',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const state = await ctx.storageState();
|
||||
// Playwright's storageState() return type is structurally compatible with
|
||||
// our CookieStore's StorageState interface.
|
||||
await cookieStore.save(platform, state);
|
||||
logger.debug({ platform }, 'Cookies saved');
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for every in-flight platform queue to settle. Useful during
|
||||
* graceful shutdown so that running operations finish before teardown.
|
||||
*/
|
||||
async drain(): Promise<void> {
|
||||
const pending = [...this.queues.values()];
|
||||
if (pending.length === 0) return;
|
||||
|
||||
logger.info(
|
||||
{ queueCount: pending.length },
|
||||
'Draining browser operation queues',
|
||||
);
|
||||
|
||||
await Promise.allSettled(pending);
|
||||
|
||||
logger.info('All browser operation queues drained');
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all browser contexts and the browser itself, then reset internal
|
||||
* state. Safe to call multiple times.
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
// Close every context individually so callers that need to flush
|
||||
// storageState can do so before calling close().
|
||||
const contextClosePromises = [...this.contexts.values()].map((ctx) =>
|
||||
ctx.close().catch((err: unknown) => {
|
||||
logger.warn({ err }, 'Error closing browser context during shutdown');
|
||||
}),
|
||||
);
|
||||
await Promise.all(contextClosePromises);
|
||||
|
||||
if (this.browser) {
|
||||
await this.browser.close().catch((err: unknown) => {
|
||||
logger.warn({ err }, 'Error closing browser during shutdown');
|
||||
});
|
||||
}
|
||||
|
||||
this.browser = null;
|
||||
this.contexts.clear();
|
||||
this.queues.clear();
|
||||
this.queueDepths.clear();
|
||||
this.launchPromise = null;
|
||||
|
||||
logger.info('BrowserManager closed');
|
||||
}
|
||||
|
||||
// -- Private helpers ------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Ensure the browser is launched and connected. Uses a launch mutex so
|
||||
* that concurrent callers share a single launch attempt instead of
|
||||
* spawning multiple browser processes.
|
||||
*/
|
||||
private async ensureBrowser(): Promise<Browser> {
|
||||
if (this.browser?.isConnected()) {
|
||||
return this.browser;
|
||||
}
|
||||
|
||||
// If another caller is already launching, piggy-back on that promise.
|
||||
if (this.launchPromise) {
|
||||
return this.launchPromise;
|
||||
}
|
||||
|
||||
this.launchPromise = this.launchBrowser();
|
||||
|
||||
try {
|
||||
const browser = await this.launchPromise;
|
||||
return browser;
|
||||
} finally {
|
||||
this.launchPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Launch a Chromium instance via rebrowser-playwright.
|
||||
*/
|
||||
private async launchBrowser(): Promise<Browser> {
|
||||
logger.info(
|
||||
{ headless: config.headless, browserBin: config.browserBin ?? 'default' },
|
||||
'Launching browser',
|
||||
);
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: config.headless,
|
||||
...(config.browserBin ? { executablePath: config.browserBin } : {}),
|
||||
});
|
||||
|
||||
// React to unexpected disconnects (e.g. browser crash, OOM kill).
|
||||
browser.on('disconnected', () => {
|
||||
logger.error('Browser disconnected unexpectedly');
|
||||
this.browser = null;
|
||||
this.contexts.clear();
|
||||
// launchPromise is intentionally NOT cleared here so the next caller
|
||||
// that calls ensureBrowser() will attempt a fresh launch.
|
||||
});
|
||||
|
||||
this.browser = browser;
|
||||
|
||||
logger.info('Browser launched successfully');
|
||||
|
||||
return browser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get (or lazily create) a BrowserContext for the given platform.
|
||||
*
|
||||
* On first creation we attempt to restore cookies from the CookieStore
|
||||
* so that sessions survive process restarts.
|
||||
*/
|
||||
private async getContext(platform: string): Promise<BrowserContext> {
|
||||
const existing = this.contexts.get(platform);
|
||||
if (existing) return existing;
|
||||
|
||||
if (!this.browser) {
|
||||
throw new Error('Cannot create context: browser is not launched');
|
||||
}
|
||||
|
||||
// Attempt to restore a previous session's storage state from disk.
|
||||
let storageState: Awaited<ReturnType<BrowserContext['storageState']>> | undefined;
|
||||
try {
|
||||
const loaded = await cookieStore.load(platform);
|
||||
if (loaded) {
|
||||
storageState = loaded;
|
||||
logger.debug(
|
||||
{ platform, cookieCount: loaded.cookies.length },
|
||||
'Restoring saved cookies into new context',
|
||||
);
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
// Cookie load failure should never prevent context creation.
|
||||
logger.warn(
|
||||
{ err, platform },
|
||||
'Failed to load saved cookies -- creating fresh context',
|
||||
);
|
||||
}
|
||||
|
||||
const ctx = await this.browser.newContext(
|
||||
storageState ? { storageState } : {},
|
||||
);
|
||||
|
||||
this.contexts.set(platform, ctx);
|
||||
|
||||
logger.debug({ platform }, 'Browser context created');
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a promise that rejects after `ms` milliseconds, used as the
|
||||
* timeout arm in `Promise.race` inside `withPage`.
|
||||
*
|
||||
* The timer is `unref()`'d so it cannot keep the Node.js event loop alive
|
||||
* during graceful shutdown. When the user's function wins the race the
|
||||
* dangling timeout promise is harmlessly garbage-collected.
|
||||
*/
|
||||
private createTimeout<T>(ms: number, platform: string): Promise<T> {
|
||||
return new Promise<T>((_resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
reject(
|
||||
new Error(
|
||||
`Operation timed out after ${ms}ms for platform "${platform}"`,
|
||||
),
|
||||
);
|
||||
}, ms);
|
||||
|
||||
// Prevent the timeout from keeping the process alive during shutdown.
|
||||
if (typeof timer === 'object' && 'unref' in timer) {
|
||||
timer.unref();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Singleton export
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const browserManager = new BrowserManager();
|
||||
@@ -0,0 +1,112 @@
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function envString(key: string, fallback: string): string {
|
||||
return process.env[key] ?? fallback;
|
||||
}
|
||||
|
||||
function envInt(key: string, fallback: number): number {
|
||||
const raw = process.env[key];
|
||||
if (raw === undefined) return fallback;
|
||||
const parsed = Number.parseInt(raw, 10);
|
||||
if (Number.isNaN(parsed)) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(`[config] Invalid integer for ${key}="${raw}", using default ${fallback}`);
|
||||
return fallback;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function envBool(key: string, fallback: boolean): boolean {
|
||||
const raw = process.env[key];
|
||||
if (raw === undefined) return fallback;
|
||||
// Accept common truthy / falsy strings
|
||||
if (['true', '1', 'yes'].includes(raw.toLowerCase())) return true;
|
||||
if (['false', '0', 'no'].includes(raw.toLowerCase())) return false;
|
||||
return fallback;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HOST safety check — must run before exporting config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const host = envString('HOST', '127.0.0.1');
|
||||
|
||||
if (host === '0.0.0.0' || host === '::') {
|
||||
const allow = process.env['ALLOW_REMOTE'];
|
||||
if (allow !== 'yes-i-understand-the-risk') {
|
||||
// Use console.error directly — the logger module depends on config,
|
||||
// so it is not available yet at this point.
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(
|
||||
`[FATAL] HOST is set to "${host}" which exposes the service to the network.\n` +
|
||||
`If you really intend to do this, set ALLOW_REMOTE=yes-i-understand-the-risk\n` +
|
||||
`Refusing to start.`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Operation timeouts (milliseconds)
|
||||
// Matches the tiers described in PLAN.md section 6.1
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const operationTimeouts: Record<string, number> = {
|
||||
like: 15_000, // 15s — quick interactions
|
||||
favorite: 15_000, // 15s
|
||||
comment: 20_000, // 20s
|
||||
reply: 20_000, // 20s
|
||||
feed_list: 30_000, // 30s — page load + extraction
|
||||
search: 30_000, // 30s
|
||||
feed_detail: 60_000, // 60s — includes scroll loading
|
||||
user_profile: 60_000, // 60s
|
||||
publish: 300_000, // 5min — upload may be slow
|
||||
login: 300_000, // 5min — user interaction
|
||||
default: 60_000, // 1min — fallback
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface AppConfig {
|
||||
/** HTTP port */
|
||||
port: number;
|
||||
/** HTTP bind address */
|
||||
host: string;
|
||||
/** Run browser in headless mode */
|
||||
headless: boolean;
|
||||
/** Custom browser executable path (optional) */
|
||||
browserBin: string | undefined;
|
||||
/** Pino log level */
|
||||
logLevel: string;
|
||||
/** NODE_ENV */
|
||||
nodeEnv: string;
|
||||
/** Directory for per-platform cookie storage */
|
||||
cookieDir: string;
|
||||
/** Max pending operations per platform queue */
|
||||
maxQueueDepth: number;
|
||||
/** Per-operation-type timeout in ms */
|
||||
operationTimeouts: Record<string, number>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Exported config singleton
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const config: AppConfig = {
|
||||
port: envInt('PORT', 3000),
|
||||
host,
|
||||
headless: envBool('HEADLESS', true),
|
||||
browserBin: process.env['BROWSER_BIN'] || undefined,
|
||||
logLevel: envString('LOG_LEVEL', 'info'),
|
||||
nodeEnv: envString('NODE_ENV', 'development'),
|
||||
cookieDir: envString('COOKIE_DIR', path.join(os.homedir(), '.social-mcp')),
|
||||
maxQueueDepth: envInt('MAX_QUEUE_DEPTH', 10),
|
||||
operationTimeouts,
|
||||
};
|
||||
@@ -0,0 +1,171 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types — mirrors Playwright's BrowserContext.storageState() shape
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
expires: number;
|
||||
httpOnly: boolean;
|
||||
secure: boolean;
|
||||
sameSite: 'Strict' | 'Lax' | 'None';
|
||||
}
|
||||
|
||||
export interface StorageState {
|
||||
cookies: Cookie[];
|
||||
origins: Array<{
|
||||
origin: string;
|
||||
localStorage: Array<{ name: string; value: string }>;
|
||||
}>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CookieStore
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const log = logger.child({ module: 'cookie-store' });
|
||||
|
||||
export class CookieStore {
|
||||
/**
|
||||
* Return the absolute path to the cookies.json for a given platform.
|
||||
*/
|
||||
getPath(platform: string): string {
|
||||
return path.join(config.cookieDir, platform, 'cookies.json');
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the persisted storage state for a platform.
|
||||
* Returns `null` when no cookie file exists yet.
|
||||
*/
|
||||
async load(platform: string): Promise<StorageState | null> {
|
||||
const filePath = this.getPath(platform);
|
||||
|
||||
try {
|
||||
const raw = await fs.readFile(filePath, 'utf-8');
|
||||
const parsed: unknown = JSON.parse(raw);
|
||||
|
||||
// Minimal structural validation so we don't blindly trust disk data.
|
||||
if (!isStorageState(parsed)) {
|
||||
log.warn({ platform, filePath }, 'Cookie file failed validation, treating as absent');
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug({ platform, cookieCount: parsed.cookies.length }, 'Loaded cookies from disk');
|
||||
return parsed;
|
||||
} catch (err: unknown) {
|
||||
if (isNodeError(err) && err.code === 'ENOENT') {
|
||||
log.debug({ platform }, 'No cookie file found');
|
||||
return null;
|
||||
}
|
||||
log.error({ err, platform, filePath }, 'Failed to load cookie file');
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist a storage state for a platform using an atomic write.
|
||||
*
|
||||
* Strategy:
|
||||
* 1. Ensure the platform directory exists (mode 0o700).
|
||||
* 2. Write to a temporary file (`.tmp.<pid>`) inside the same directory.
|
||||
* 3. Set file permissions to 0o600.
|
||||
* 4. Atomically rename the temp file to the final path.
|
||||
*
|
||||
* Because rename is atomic on the same filesystem, readers will never
|
||||
* observe a partially-written cookies.json.
|
||||
*/
|
||||
async save(platform: string, state: StorageState): Promise<void> {
|
||||
const filePath = this.getPath(platform);
|
||||
const dir = path.dirname(filePath);
|
||||
const tmpPath = path.join(dir, `.tmp.${process.pid}`);
|
||||
|
||||
try {
|
||||
// Ensure directory exists with restricted permissions.
|
||||
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
|
||||
|
||||
const json = JSON.stringify(state, null, 2);
|
||||
|
||||
// Write to temp file, set permissions, then atomically rename.
|
||||
await fs.writeFile(tmpPath, json, { encoding: 'utf-8', mode: 0o600 });
|
||||
await fs.rename(tmpPath, filePath);
|
||||
|
||||
log.debug(
|
||||
{ platform, cookieCount: state.cookies.length },
|
||||
'Saved cookies to disk',
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
log.error({ err, platform, filePath }, 'Failed to save cookie file');
|
||||
|
||||
// Best-effort cleanup of the temp file.
|
||||
try {
|
||||
await fs.unlink(tmpPath);
|
||||
} catch {
|
||||
// Ignore — the temp file may not have been created.
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the cookie file for a platform.
|
||||
* Silently succeeds when no file exists.
|
||||
*/
|
||||
async delete(platform: string): Promise<void> {
|
||||
const filePath = this.getPath(platform);
|
||||
|
||||
try {
|
||||
await fs.unlink(filePath);
|
||||
log.debug({ platform }, 'Deleted cookie file');
|
||||
} catch (err: unknown) {
|
||||
if (isNodeError(err) && err.code === 'ENOENT') {
|
||||
log.debug({ platform }, 'Cookie file already absent, nothing to delete');
|
||||
return;
|
||||
}
|
||||
log.error({ err, platform, filePath }, 'Failed to delete cookie file');
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Singleton
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const cookieStore = new CookieStore();
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface NodeError extends Error {
|
||||
code?: string;
|
||||
}
|
||||
|
||||
function isNodeError(err: unknown): err is NodeError {
|
||||
return err instanceof Error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lightweight runtime check that the parsed JSON matches the StorageState
|
||||
* shape we expect. This is intentionally lenient — we only verify the
|
||||
* top-level structure so that forward-compatible fields are not rejected.
|
||||
*/
|
||||
function isStorageState(value: unknown): value is StorageState {
|
||||
if (typeof value !== 'object' || value === null) return false;
|
||||
|
||||
const obj = value as Record<string, unknown>;
|
||||
|
||||
if (!Array.isArray(obj['cookies'])) return false;
|
||||
if (!Array.isArray(obj['origins'])) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
import { logger } from './utils/logger.js';
|
||||
import { browserManager } from './browser/manager.js';
|
||||
import { AppServer } from './server/app.js';
|
||||
import { xiaohongshuPlugin } from './platforms/xiaohongshu/index.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Bootstrap
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const appServer = new AppServer();
|
||||
|
||||
// -- Platform plugins -------------------------------------------------------
|
||||
appServer.registerPlugin(xiaohongshuPlugin);
|
||||
|
||||
// -- Start ------------------------------------------------------------------
|
||||
|
||||
appServer.start().catch((err: unknown) => {
|
||||
logger.fatal({ err }, 'Failed to start server');
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Graceful shutdown
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let shuttingDown = false;
|
||||
|
||||
async function gracefulShutdown(signal: string): Promise<void> {
|
||||
if (shuttingDown) return;
|
||||
shuttingDown = true;
|
||||
|
||||
logger.info({ signal }, 'Received shutdown signal — starting graceful shutdown');
|
||||
|
||||
// Safety net: if graceful shutdown takes too long, force exit.
|
||||
const forceExitTimer = setTimeout(() => {
|
||||
logger.fatal('Graceful shutdown timed out after 45s — forcing exit');
|
||||
process.exit(1);
|
||||
}, 45_000);
|
||||
|
||||
// Prevent the safety-net timer from keeping the process alive on its own.
|
||||
if (typeof forceExitTimer === 'object' && 'unref' in forceExitTimer) {
|
||||
forceExitTimer.unref();
|
||||
}
|
||||
|
||||
try {
|
||||
// Step 1: Drain browser queues so in-flight operations finish (max 30s).
|
||||
logger.info('Shutdown step 1/5: draining browser queues');
|
||||
await Promise.race([
|
||||
browserManager.drain(),
|
||||
new Promise<void>((resolve) => setTimeout(resolve, 30_000).unref()),
|
||||
]);
|
||||
|
||||
// Step 2: Close the browser and all contexts.
|
||||
logger.info('Shutdown step 2/5: closing browser');
|
||||
await browserManager.close();
|
||||
|
||||
// Step 3: Close the HTTP server (stop accepting new connections).
|
||||
logger.info('Shutdown step 3/5: closing HTTP server');
|
||||
await appServer.close();
|
||||
|
||||
// Step 4: Flush structured logs so nothing is lost.
|
||||
logger.info('Shutdown step 4/5: flushing logger');
|
||||
logger.flush();
|
||||
|
||||
// Step 5: Exit cleanly.
|
||||
logger.info('Shutdown step 5/5: exiting');
|
||||
process.exit(0);
|
||||
} catch (err: unknown) {
|
||||
logger.fatal({ err }, 'Error during graceful shutdown');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
process.on('SIGINT', () => void gracefulShutdown('SIGINT'));
|
||||
process.on('SIGTERM', () => void gracefulShutdown('SIGTERM'));
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Global error handlers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
process.on('unhandledRejection', (reason: unknown) => {
|
||||
logger.fatal({ err: reason }, 'Unhandled promise rejection');
|
||||
void gracefulShutdown('unhandledRejection');
|
||||
});
|
||||
|
||||
process.on('uncaughtException', (err: Error) => {
|
||||
logger.fatal({ err }, 'Uncaught exception');
|
||||
void gracefulShutdown('uncaughtException');
|
||||
});
|
||||
@@ -0,0 +1,322 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Wait after typing comment text before submitting. */
|
||||
const TYPE_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after submit click to verify success. */
|
||||
const SUBMIT_SETTLE_MS = 2_000;
|
||||
|
||||
const selComment = XHS_SELECTORS.comment;
|
||||
const selDetail = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-comment' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// postComment
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Post a top-level comment on a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID to comment on.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param content - Comment text to post.
|
||||
* @returns Object indicating whether the comment was posted successfully.
|
||||
*/
|
||||
export async function postComment(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
content: string,
|
||||
): Promise<{ success: boolean }> {
|
||||
log.info({ feedId }, 'Posting comment on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note container to be visible.
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Find and focus the comment input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const commentInput = await findCommentInput(page);
|
||||
|
||||
if (!commentInput) {
|
||||
log.warn('Comment input not found on feed detail page');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Type the comment content
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await commentInput.click();
|
||||
await page.waitForTimeout(300);
|
||||
await page.keyboard.type(content, { delay: 30 });
|
||||
await page.waitForTimeout(TYPE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Submit the comment
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const submitted = await submitComment(page);
|
||||
|
||||
if (!submitted) {
|
||||
log.warn('Failed to submit comment — submit button not found or click failed');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Verify the comment was posted
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.waitForTimeout(SUBMIT_SETTLE_MS);
|
||||
|
||||
// Check for the comment text in the page to verify success.
|
||||
const pageContent = await page.content();
|
||||
const success = pageContent.includes(content.slice(0, 20));
|
||||
|
||||
log.info({ feedId, success }, 'Comment post complete');
|
||||
|
||||
return { success };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// replyComment
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reply to an existing comment on a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param content - Reply text to post.
|
||||
* @param commentId - Optional ID of the comment to reply to (for targeting).
|
||||
* @param userId - Optional user ID of the comment author (for @ mention).
|
||||
* @returns Object indicating whether the reply was posted successfully.
|
||||
*/
|
||||
export async function replyComment(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
content: string,
|
||||
commentId?: string,
|
||||
userId?: string,
|
||||
): Promise<{ success: boolean }> {
|
||||
log.info({ feedId, commentId, userId }, 'Replying to comment on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Find the target comment and click its reply button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
let replyInputFound = false;
|
||||
|
||||
if (commentId) {
|
||||
// Try to find the comment by its ID attribute and click its reply button.
|
||||
replyInputFound = await clickReplyOnComment(page, commentId);
|
||||
}
|
||||
|
||||
if (!replyInputFound) {
|
||||
// Fallback: find the first comment's reply button.
|
||||
const commentItems = await page.$$(selComment.commentItem);
|
||||
|
||||
if (commentItems.length > 0) {
|
||||
// Hover to reveal the reply button (some UIs show it on hover).
|
||||
const firstComment = commentItems[0]!;
|
||||
await firstComment.hover();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
const replyBtn = await firstComment.$(selComment.commentReplyButton);
|
||||
if (replyBtn) {
|
||||
await replyBtn.click();
|
||||
await page.waitForTimeout(500);
|
||||
replyInputFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we still haven't activated a reply input, fall back to the main
|
||||
// comment input and prefix with @userId if available.
|
||||
if (!replyInputFound) {
|
||||
log.debug('Reply button not found, falling back to main comment input');
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Type the reply content
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Try to find the reply-specific input first, then fall back to the
|
||||
// main comment input.
|
||||
const replyInput =
|
||||
(await page.$(selComment.replyInput)) ??
|
||||
(await findCommentInput(page));
|
||||
|
||||
if (!replyInput) {
|
||||
log.warn('Reply input not found');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
await replyInput.click();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
// If we're using the main comment input as fallback, prefix with @user.
|
||||
const textToType =
|
||||
!replyInputFound && userId ? `@${userId} ${content}` : content;
|
||||
|
||||
await page.keyboard.type(textToType, { delay: 30 });
|
||||
await page.waitForTimeout(TYPE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Submit the reply
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const submitted = await submitComment(page);
|
||||
|
||||
if (!submitted) {
|
||||
log.warn('Failed to submit reply');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Verify
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.waitForTimeout(SUBMIT_SETTLE_MS);
|
||||
|
||||
const pageContent = await page.content();
|
||||
const success = pageContent.includes(content.slice(0, 20));
|
||||
|
||||
log.info({ feedId, commentId, success }, 'Reply post complete');
|
||||
|
||||
return { success };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the URL for a feed detail page.
|
||||
*/
|
||||
function buildFeedUrl(feedId: string, xsecToken: string): string {
|
||||
return `${FEED_DETAIL_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_search`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the main comment input element. Tries the primary selector first,
|
||||
* then the alternative contenteditable selector.
|
||||
*/
|
||||
async function findCommentInput(page: Page) {
|
||||
// Try the primary comment textarea.
|
||||
let input = await page.$(selComment.commentInput);
|
||||
if (input) return input;
|
||||
|
||||
// Try the alternative contenteditable div.
|
||||
input = await page.$(selComment.commentInputAlt);
|
||||
if (input) return input;
|
||||
|
||||
// Last resort: try clicking in the comment area to activate the input.
|
||||
// Some UIs only show the input after clicking in the comment zone.
|
||||
const commentArea = await page.$('.comment-area, .comments-container');
|
||||
if (commentArea) {
|
||||
await commentArea.click();
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
input = await page.$(selComment.commentInput);
|
||||
if (input) return input;
|
||||
|
||||
input = await page.$(selComment.commentInputAlt);
|
||||
if (input) return input;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a specific comment by its ID and click its reply button.
|
||||
*/
|
||||
async function clickReplyOnComment(
|
||||
page: Page,
|
||||
commentId: string,
|
||||
): Promise<boolean> {
|
||||
// Try to find comment by data attribute or ID.
|
||||
const commentEl =
|
||||
(await page.$(`[id="comment-${commentId}"]`)) ??
|
||||
(await page.$(`[data-comment-id="${commentId}"]`));
|
||||
|
||||
if (!commentEl) {
|
||||
log.debug({ commentId }, 'Target comment element not found by ID');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Hover to reveal the reply button.
|
||||
await commentEl.hover();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
const replyBtn = await commentEl.$(selComment.commentReplyButton);
|
||||
if (!replyBtn) {
|
||||
log.debug({ commentId }, 'Reply button not found on target comment');
|
||||
return false;
|
||||
}
|
||||
|
||||
await replyBtn.click();
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find and click the comment submit button. Tries multiple selectors.
|
||||
*/
|
||||
async function submitComment(page: Page): Promise<boolean> {
|
||||
// Try the primary submit button.
|
||||
let submitBtn = await page.$(selComment.commentSubmitButton);
|
||||
|
||||
if (!submitBtn) {
|
||||
// Some UIs submit on Ctrl+Enter / Cmd+Enter.
|
||||
log.debug('Submit button not found, trying keyboard shortcut');
|
||||
await page.keyboard.press('Control+Enter');
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the button is enabled before clicking.
|
||||
const isDisabled = await submitBtn.isDisabled().catch(() => false);
|
||||
if (isDisabled) {
|
||||
log.debug('Submit button is disabled, waiting briefly');
|
||||
await page.waitForTimeout(1_000);
|
||||
// Re-query in case the button became enabled.
|
||||
submitBtn = await page.$(selComment.commentSubmitButton);
|
||||
if (!submitBtn) return false;
|
||||
}
|
||||
|
||||
await submitBtn.click();
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,724 @@
|
||||
import type { Page, ElementHandle } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { FeedDetail, Comment } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_BASE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Maximum number of "show more" clicks to load comments. */
|
||||
const MAX_LOAD_MORE_CLICKS = 20;
|
||||
|
||||
/** Delay between "show more" clicks to let the page render. */
|
||||
const LOAD_MORE_DELAY_MS = 1500;
|
||||
|
||||
const SEL = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-feed-detail' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for feed detail
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawDetailState {
|
||||
noteData?: {
|
||||
data?: {
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
note?: {
|
||||
noteDetailMap?: Record<string, { note?: RawNoteData }>;
|
||||
note?: RawNoteData;
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface RawNoteData {
|
||||
noteId?: string;
|
||||
id?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
type?: string;
|
||||
imageList?: RawNoteImage[];
|
||||
image_list?: RawNoteImage[];
|
||||
video?: RawNoteVideo;
|
||||
tagList?: RawNoteTag[];
|
||||
tag_list?: RawNoteTag[];
|
||||
interactInfo?: RawNoteInteract;
|
||||
interact_info?: RawNoteInteract;
|
||||
time?: number;
|
||||
createTime?: number;
|
||||
create_time?: number;
|
||||
lastUpdateTime?: number;
|
||||
last_update_time?: number;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
user?: RawNoteUser;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
comments?: RawCommentData[];
|
||||
}
|
||||
|
||||
interface RawNoteImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawNoteVideo {
|
||||
url?: string;
|
||||
consumer?: {
|
||||
originVideoKey?: string;
|
||||
origin_video_key?: string;
|
||||
};
|
||||
media?: {
|
||||
stream?: {
|
||||
h264?: Array<{
|
||||
masterUrl?: string;
|
||||
master_url?: string;
|
||||
}>;
|
||||
};
|
||||
video?: {
|
||||
url?: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface RawNoteTag {
|
||||
id?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
interface RawNoteInteract {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
collectedCount?: string;
|
||||
collected_count?: string;
|
||||
commentCount?: string;
|
||||
comment_count?: string;
|
||||
shareCount?: string;
|
||||
share_count?: string;
|
||||
}
|
||||
|
||||
interface RawNoteUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawCommentData {
|
||||
id?: string;
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
userInfo?: {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
image?: string;
|
||||
avatar?: string;
|
||||
};
|
||||
content?: string;
|
||||
likeCount?: string | number;
|
||||
like_count?: string | number;
|
||||
createTime?: number;
|
||||
create_time?: number;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
subComments?: RawCommentData[];
|
||||
sub_comments?: RawCommentData[];
|
||||
subCommentCount?: number | string;
|
||||
sub_comment_count?: number | string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getFeedDetail
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to a Xiaohongshu note detail page and extract comprehensive
|
||||
* information including title, content, images/video, stats, and comments.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note (feed) ID.
|
||||
* @param xsecToken - Security token required to access the note.
|
||||
* @param loadAllComments - If true, scrolls and clicks "load more" to fetch
|
||||
* as many comments as possible.
|
||||
* @returns A FeedDetail object with full note data and comments.
|
||||
*/
|
||||
export async function getFeedDetail(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
loadAllComments = false,
|
||||
): Promise<FeedDetail> {
|
||||
const url = `${FEED_DETAIL_BASE_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_feed`;
|
||||
log.debug({ feedId, url }, 'Navigating to feed detail page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note content container to appear.
|
||||
await page.waitForSelector(
|
||||
`${SEL.noteContainer}, ${SEL.title}, ${SEL.description}`,
|
||||
{ timeout: 15_000 },
|
||||
).catch(() => {
|
||||
log.warn({ feedId }, 'Note container not found within timeout, proceeding with extraction');
|
||||
});
|
||||
|
||||
// Allow render to settle.
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as RawDetailState | null;
|
||||
let detail: FeedDetail | null = null;
|
||||
|
||||
if (initialState) {
|
||||
detail = parseDetailFromState(initialState, feedId, xsecToken);
|
||||
if (detail) {
|
||||
log.debug({ feedId }, 'Extracted feed detail from __INITIAL_STATE__');
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping
|
||||
// -----------------------------------------------------------------------
|
||||
if (!detail) {
|
||||
log.debug({ feedId }, 'Falling back to DOM scraping for feed detail');
|
||||
detail = await scrapeDetailFromDom(page, feedId, xsecToken);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Load comments (from DOM — __INITIAL_STATE__ may not include them)
|
||||
// -----------------------------------------------------------------------
|
||||
if (detail.comments.length === 0 || loadAllComments) {
|
||||
const comments = await scrapeComments(page, loadAllComments);
|
||||
if (comments.length > 0) {
|
||||
detail.comments = comments;
|
||||
}
|
||||
}
|
||||
|
||||
log.info(
|
||||
{ feedId, commentCount: detail.comments.length, imageCount: detail.images.length },
|
||||
'Feed detail extraction complete',
|
||||
);
|
||||
|
||||
return detail;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse feed detail from __INITIAL_STATE__.
|
||||
*/
|
||||
function parseDetailFromState(
|
||||
state: RawDetailState,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): FeedDetail | null {
|
||||
// Try multiple possible locations for note data.
|
||||
let noteData: RawNoteData | undefined;
|
||||
|
||||
// Location 1: state.noteData.data.noteData (common structure)
|
||||
noteData = state.noteData?.data?.noteData;
|
||||
|
||||
// Location 2: state.noteData.noteData
|
||||
if (!noteData) {
|
||||
noteData = state.noteData?.noteData;
|
||||
}
|
||||
|
||||
// Location 3: state.note.noteDetailMap[feedId].note
|
||||
if (!noteData && state.note?.noteDetailMap) {
|
||||
const mapEntry = state.note.noteDetailMap[feedId];
|
||||
noteData = mapEntry?.note;
|
||||
}
|
||||
|
||||
// Location 4: state.note.note or state.note.noteData
|
||||
if (!noteData) {
|
||||
noteData = state.note?.note ?? state.note?.noteData;
|
||||
}
|
||||
|
||||
if (!noteData) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const id = noteData.noteId ?? noteData.id ?? feedId;
|
||||
const title = noteData.title ?? '';
|
||||
const description = noteData.desc ?? noteData.description ?? '';
|
||||
const rawType = noteData.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Images
|
||||
const rawImages = noteData.imageList ?? noteData.image_list ?? [];
|
||||
const images = rawImages
|
||||
.map((img) => {
|
||||
if (img.url) return ensureHttps(img.url);
|
||||
if (img.urlPre) return ensureHttps(img.urlPre);
|
||||
if (img.urlDefault) return ensureHttps(img.urlDefault);
|
||||
if (img.url_pre) return ensureHttps(img.url_pre);
|
||||
if (img.url_default) return ensureHttps(img.url_default);
|
||||
const info = img.infoList ?? img.info_list;
|
||||
if (info && info.length > 0 && info[0]?.url) return ensureHttps(info[0].url);
|
||||
return '';
|
||||
})
|
||||
.filter((url) => url !== '');
|
||||
|
||||
// Video URL
|
||||
let videoUrl: string | undefined;
|
||||
if (noteData.video) {
|
||||
const v = noteData.video;
|
||||
videoUrl =
|
||||
v.url ??
|
||||
v.media?.stream?.h264?.[0]?.masterUrl ??
|
||||
v.media?.stream?.h264?.[0]?.master_url ??
|
||||
v.media?.video?.url ??
|
||||
undefined;
|
||||
if (videoUrl) videoUrl = ensureHttps(videoUrl);
|
||||
}
|
||||
|
||||
// Tags
|
||||
const rawTags = noteData.tagList ?? noteData.tag_list ?? [];
|
||||
const tags = rawTags
|
||||
.map((t) => t.name ?? '')
|
||||
.filter((name) => name !== '');
|
||||
|
||||
// Interaction stats
|
||||
const interact = noteData.interactInfo ?? noteData.interact_info;
|
||||
const likeCount = parseCountString(
|
||||
interact?.likedCount ?? interact?.liked_count ?? '0',
|
||||
);
|
||||
const collectCount = parseCountString(
|
||||
interact?.collectedCount ?? interact?.collected_count ?? '0',
|
||||
);
|
||||
const commentCount = parseCountString(
|
||||
interact?.commentCount ?? interact?.comment_count ?? '0',
|
||||
);
|
||||
const shareCount = parseCountString(
|
||||
interact?.shareCount ?? interact?.share_count ?? '0',
|
||||
);
|
||||
|
||||
// Timestamps
|
||||
const createTimeRaw = noteData.time ?? noteData.createTime ?? noteData.create_time;
|
||||
const createTime = createTimeRaw
|
||||
? new Date(typeof createTimeRaw === 'number' && createTimeRaw < 1e12 ? createTimeRaw * 1000 : createTimeRaw).toISOString()
|
||||
: '';
|
||||
const lastUpdateTimeRaw = noteData.lastUpdateTime ?? noteData.last_update_time;
|
||||
const lastUpdateTime = lastUpdateTimeRaw
|
||||
? new Date(typeof lastUpdateTimeRaw === 'number' && lastUpdateTimeRaw < 1e12 ? lastUpdateTimeRaw * 1000 : lastUpdateTimeRaw).toISOString()
|
||||
: '';
|
||||
|
||||
const ipLocation = noteData.ipLocation ?? noteData.ip_location ?? '';
|
||||
|
||||
// User
|
||||
const rawUser = noteData.user;
|
||||
const user = {
|
||||
id: rawUser?.userId ?? rawUser?.user_id ?? '',
|
||||
nickname: rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '',
|
||||
avatar: rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '',
|
||||
};
|
||||
|
||||
// Comments from state (may be empty)
|
||||
const rawComments = noteData.comments ?? [];
|
||||
const comments = rawComments.map(parseRawComment).filter((c): c is Comment => c !== null);
|
||||
|
||||
const resolvedXsecToken = noteData.xsecToken ?? noteData.xsec_token ?? xsecToken;
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken: resolvedXsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
images,
|
||||
videoUrl,
|
||||
tags,
|
||||
likeCount,
|
||||
collectCount,
|
||||
commentCount,
|
||||
shareCount,
|
||||
createTime,
|
||||
lastUpdateTime,
|
||||
ipLocation,
|
||||
user,
|
||||
comments,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a raw comment from __INITIAL_STATE__ into a Comment object.
|
||||
*/
|
||||
function parseRawComment(raw: RawCommentData): Comment | null {
|
||||
const id = raw.id ?? '';
|
||||
if (!id) return null;
|
||||
|
||||
const userInfo = raw.userInfo;
|
||||
const userId = raw.userId ?? raw.user_id ?? userInfo?.userId ?? userInfo?.user_id ?? '';
|
||||
const nickname = userInfo?.nickname ?? userInfo?.nick_name ?? '';
|
||||
const avatar = userInfo?.image ?? userInfo?.avatar ?? '';
|
||||
const content = raw.content ?? '';
|
||||
|
||||
const likeCountRaw = raw.likeCount ?? raw.like_count ?? 0;
|
||||
const likeCount = typeof likeCountRaw === 'string'
|
||||
? parseCountString(likeCountRaw)
|
||||
: likeCountRaw;
|
||||
|
||||
const createTimeRaw = raw.createTime ?? raw.create_time;
|
||||
const createTime = createTimeRaw
|
||||
? new Date(typeof createTimeRaw === 'number' && createTimeRaw < 1e12 ? createTimeRaw * 1000 : createTimeRaw).toISOString()
|
||||
: '';
|
||||
|
||||
const ipLocation = raw.ipLocation ?? raw.ip_location ?? '';
|
||||
|
||||
const rawSubs = raw.subComments ?? raw.sub_comments ?? [];
|
||||
const subComments = rawSubs.map(parseRawComment).filter((c): c is Comment => c !== null);
|
||||
|
||||
return {
|
||||
id,
|
||||
userId,
|
||||
nickname,
|
||||
avatar,
|
||||
content,
|
||||
likeCount,
|
||||
createTime,
|
||||
ipLocation,
|
||||
subComments,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape feed detail from the rendered DOM using Playwright's Node-side
|
||||
* APIs ($eval, $$eval, $) to avoid needing DOM lib types.
|
||||
*/
|
||||
async function scrapeDetailFromDom(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): Promise<FeedDetail> {
|
||||
// Title
|
||||
const title = await page
|
||||
.$eval(SEL.title, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Description
|
||||
const description = await page
|
||||
.$eval(SEL.description, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Images — try image list first, then hero image.
|
||||
let images: string[] = await page
|
||||
.$$eval(SEL.images, (imgs) =>
|
||||
imgs.map((img) => img.getAttribute('src') ?? '').filter(Boolean),
|
||||
)
|
||||
.catch(() => [] as string[]);
|
||||
|
||||
if (images.length === 0) {
|
||||
const heroSrc = await page
|
||||
.$eval(SEL.heroImage, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (heroSrc) images = [heroSrc];
|
||||
}
|
||||
|
||||
// Video URL
|
||||
let videoUrl: string | undefined;
|
||||
const videoSrc = await page
|
||||
.$eval(SEL.video, (video) => video.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (videoSrc) {
|
||||
videoUrl = videoSrc;
|
||||
} else {
|
||||
const sourceSrc = await page
|
||||
.$eval(SEL.videoSource, (source) => source.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (sourceSrc) videoUrl = sourceSrc;
|
||||
}
|
||||
|
||||
const type: 'normal' | 'video' = videoUrl ? 'video' : 'normal';
|
||||
|
||||
// Tags
|
||||
const tags: string[] = await page
|
||||
.$$eval(SEL.tags, (els) =>
|
||||
els
|
||||
.map((el) => el.textContent?.trim().replace(/^#/, '') ?? '')
|
||||
.filter(Boolean),
|
||||
)
|
||||
.catch(() => [] as string[]);
|
||||
|
||||
// Stats
|
||||
const likeCount = await extractCount(page, SEL.likeCount);
|
||||
const collectCount = await extractCount(page, SEL.collectCount);
|
||||
const commentCount = await extractCount(page, SEL.commentCount);
|
||||
const shareCount = await extractCount(page, SEL.shareCount);
|
||||
|
||||
// Create time
|
||||
const createTime = await page
|
||||
.$eval(SEL.createTime, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// IP location
|
||||
const ipLocation = await page
|
||||
.$eval(SEL.ipLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Author info
|
||||
const authorName = await page
|
||||
.$eval(SEL.authorName, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const authorAvatar = await page
|
||||
.$eval(SEL.authorAvatar, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Extract author ID from the author link href.
|
||||
const authorLinkHref = await page
|
||||
.$eval(SEL.authorLink, (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const authorIdMatch = authorLinkHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const authorId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
return {
|
||||
id: feedId,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
images,
|
||||
videoUrl,
|
||||
tags,
|
||||
likeCount,
|
||||
collectCount,
|
||||
commentCount,
|
||||
shareCount,
|
||||
createTime,
|
||||
lastUpdateTime: '',
|
||||
ipLocation,
|
||||
user: {
|
||||
id: authorId,
|
||||
nickname: authorName,
|
||||
avatar: authorAvatar,
|
||||
},
|
||||
comments: [],
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Comment scraping from DOM — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape comments from the note detail page DOM.
|
||||
*
|
||||
* @param page - The current Playwright page (already on the detail URL).
|
||||
* @param loadAllComments - If true, clicks "show more" buttons repeatedly.
|
||||
* @returns An array of Comment objects.
|
||||
*/
|
||||
async function scrapeComments(
|
||||
page: Page,
|
||||
loadAllComments: boolean,
|
||||
): Promise<Comment[]> {
|
||||
// Scroll down to the comments section to trigger lazy loading.
|
||||
// Use a string expression to avoid needing DOM types.
|
||||
await page.evaluate(`
|
||||
(() => {
|
||||
const commentsArea = document.querySelector('.comments-container');
|
||||
if (commentsArea) {
|
||||
commentsArea.scrollIntoView({ behavior: 'smooth' });
|
||||
} else {
|
||||
window.scrollTo(0, document.body.scrollHeight);
|
||||
}
|
||||
})()
|
||||
`);
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// If loadAllComments, keep clicking "show more" until it disappears or
|
||||
// we hit the maximum click limit.
|
||||
if (loadAllComments) {
|
||||
let clicks = 0;
|
||||
while (clicks < MAX_LOAD_MORE_CLICKS) {
|
||||
const showMoreBtn = await page.$(SEL.showMoreComments);
|
||||
if (!showMoreBtn) break;
|
||||
|
||||
const isVisible = await showMoreBtn.isVisible().catch(() => false);
|
||||
if (!isVisible) break;
|
||||
|
||||
await showMoreBtn.click().catch(() => {});
|
||||
await page.waitForTimeout(LOAD_MORE_DELAY_MS);
|
||||
clicks++;
|
||||
}
|
||||
|
||||
if (clicks > 0) {
|
||||
log.debug({ clicks }, 'Clicked "show more comments" button');
|
||||
}
|
||||
}
|
||||
|
||||
// Now extract all visible comments using Playwright Node-side API.
|
||||
const commentElements = await page.$$(SEL.commentItem);
|
||||
const comments: Comment[] = [];
|
||||
|
||||
for (const commentEl of commentElements) {
|
||||
try {
|
||||
const comment = await parseCommentElement(commentEl);
|
||||
if (comment) {
|
||||
comments.push(comment);
|
||||
}
|
||||
} catch {
|
||||
// Skip comments that fail to parse.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return comments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a single comment element into a Comment object using Playwright
|
||||
* Node-side API.
|
||||
*/
|
||||
async function parseCommentElement(
|
||||
commentEl: ElementHandle,
|
||||
): Promise<Comment | null> {
|
||||
const content = await commentEl
|
||||
.$eval(SEL.commentContent, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const nickname = await commentEl
|
||||
.$eval(SEL.commentAuthor, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const avatar = await commentEl
|
||||
.$eval(SEL.commentAvatar, (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const likeText = await commentEl
|
||||
.$eval(SEL.commentLikeCount, (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
|
||||
const createTime = await commentEl
|
||||
.$eval(SEL.commentTime, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const ipLocation = await commentEl
|
||||
.$eval(SEL.commentIpLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Try to extract comment ID from the element's attributes.
|
||||
const commentId = await commentEl.evaluate(
|
||||
(el) =>
|
||||
el.getAttribute('id') ??
|
||||
el.getAttribute('data-id') ??
|
||||
el.getAttribute('data-comment-id') ??
|
||||
'',
|
||||
);
|
||||
|
||||
// Try to extract user ID from an author link.
|
||||
const authorHref = await commentEl
|
||||
.$eval('a[href*="/user/profile/"]', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const userIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = userIdMatch?.[1] ?? '';
|
||||
|
||||
// Sub-comments (replies)
|
||||
const subCommentElements = await commentEl.$$(SEL.subCommentItem);
|
||||
const subComments: Comment[] = [];
|
||||
|
||||
for (const subEl of subCommentElements) {
|
||||
try {
|
||||
const subContent = await subEl
|
||||
.$eval(SEL.commentContent, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const subAuthor = await subEl
|
||||
.$eval(SEL.commentAuthor, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const subAvatar = await subEl
|
||||
.$eval(SEL.commentAvatar, (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
const subLikeText = await subEl
|
||||
.$eval(SEL.commentLikeCount, (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
const subTime = await subEl
|
||||
.$eval(SEL.commentTime, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const subIp = await subEl
|
||||
.$eval(SEL.commentIpLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const subId = await subEl.evaluate(
|
||||
(el) =>
|
||||
el.getAttribute('id') ??
|
||||
el.getAttribute('data-id') ??
|
||||
el.getAttribute('data-comment-id') ??
|
||||
'',
|
||||
);
|
||||
|
||||
const subAuthorHref = await subEl
|
||||
.$eval('a[href*="/user/profile/"]', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const subUserIdMatch = subAuthorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
|
||||
subComments.push({
|
||||
id: subId,
|
||||
userId: subUserIdMatch?.[1] ?? '',
|
||||
nickname: subAuthor,
|
||||
avatar: subAvatar,
|
||||
content: subContent,
|
||||
likeCount: parseCountString(subLikeText),
|
||||
createTime: subTime,
|
||||
ipLocation: subIp,
|
||||
subComments: [],
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: commentId,
|
||||
userId,
|
||||
nickname,
|
||||
avatar,
|
||||
content,
|
||||
likeCount: parseCountString(likeText),
|
||||
createTime,
|
||||
ipLocation,
|
||||
subComments,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract a numeric count from an element on the page, handling
|
||||
* abbreviations like "1.2w" and "3k".
|
||||
*/
|
||||
async function extractCount(page: Page, selector: string): Promise<number> {
|
||||
const text = await page
|
||||
.$eval(selector, (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
return parseCountString(text);
|
||||
}
|
||||
@@ -0,0 +1,401 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import type { Feed } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
const log = logger.child({ module: 'xhs-feeds' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types (partial — only the fields we care about)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Shape of a single feed item inside __INITIAL_STATE__.homeFeed.feeds */
|
||||
interface RawFeedItem {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
type?: string;
|
||||
noteCard?: RawNoteCard;
|
||||
model_type?: string;
|
||||
cover?: RawImage;
|
||||
user?: RawUser;
|
||||
interactInfo?: RawInteractInfo;
|
||||
interact_info?: RawInteractInfo;
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
interface RawNoteCard {
|
||||
noteId?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: RawImage;
|
||||
user?: RawUser;
|
||||
interactInfo?: RawInteractInfo;
|
||||
interact_info?: RawInteractInfo;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
}
|
||||
|
||||
interface RawImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawInteractInfo {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Partial shape of the __INITIAL_STATE__ global variable.
|
||||
* Xiaohongshu places SSR data here for hydration.
|
||||
*/
|
||||
interface InitialState {
|
||||
homeFeed?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
feed?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
explore?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// listFeeds — extract feeds from the explore page
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to the Xiaohongshu explore (home) page and extract the feed list
|
||||
* from the server-rendered `__INITIAL_STATE__` global variable.
|
||||
*
|
||||
* Falls back to DOM scraping if `__INITIAL_STATE__` is unavailable or does
|
||||
* not contain feed data.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @returns An array of Feed objects.
|
||||
*/
|
||||
export async function listFeeds(page: Page): Promise<Feed[]> {
|
||||
log.debug('Navigating to explore page');
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Allow the page a moment for client-side hydration to settle.
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page);
|
||||
|
||||
if (initialState) {
|
||||
const feeds = parseFeedsFromState(initialState);
|
||||
if (feeds.length > 0) {
|
||||
log.info({ count: feeds.length }, 'Extracted feeds from __INITIAL_STATE__');
|
||||
return feeds;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no feeds extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping using Playwright Node-side API
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug('Falling back to DOM scraping for feed list');
|
||||
const feeds = await scrapeFeedsFromDom(page);
|
||||
log.info({ count: feeds.length }, 'Extracted feeds from DOM');
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ extraction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Attempt to extract the `__INITIAL_STATE__` object from the page.
|
||||
* Returns `null` if the variable is not present or not an object.
|
||||
*
|
||||
* The evaluate callback runs in the browser context. We return `unknown`
|
||||
* and cast on the Node side to avoid needing DOM lib types.
|
||||
*/
|
||||
async function extractInitialState(page: Page): Promise<InitialState | null> {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const state: unknown = await page.evaluate('window.__INITIAL_STATE__');
|
||||
|
||||
if (state && typeof state === 'object') {
|
||||
return state as InitialState;
|
||||
}
|
||||
|
||||
log.debug('__INITIAL_STATE__ is not present or not an object');
|
||||
return null;
|
||||
} catch (err: unknown) {
|
||||
log.warn({ err }, 'Failed to extract __INITIAL_STATE__');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Feed parsing from __INITIAL_STATE__
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse the raw initial state into structured Feed objects.
|
||||
* Handles multiple possible shapes of the state data (Xiaohongshu has
|
||||
* changed the structure over time).
|
||||
*/
|
||||
function parseFeedsFromState(state: InitialState): Feed[] {
|
||||
// Try multiple known locations for the feed list.
|
||||
const rawFeeds: RawFeedItem[] =
|
||||
state.homeFeed?.feeds ??
|
||||
state.feed?.feeds ??
|
||||
state.explore?.feeds ??
|
||||
[];
|
||||
|
||||
if (rawFeeds.length === 0) {
|
||||
// Walk top-level keys looking for an array that resembles feeds.
|
||||
for (const key of Object.keys(state)) {
|
||||
const value = state[key];
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const obj = value as Record<string, unknown>;
|
||||
if (Array.isArray(obj['feeds'])) {
|
||||
return (obj['feeds'] as RawFeedItem[])
|
||||
.map(parseRawFeedItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawFeeds
|
||||
.map(parseRawFeedItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a single raw feed item into a structured Feed, or return null if
|
||||
* insufficient data is available.
|
||||
*/
|
||||
function parseRawFeedItem(raw: RawFeedItem): Feed | null {
|
||||
// The feed data can be either flat or nested under `noteCard`.
|
||||
const card = raw.noteCard;
|
||||
|
||||
const id =
|
||||
raw.id ?? raw.noteId ?? raw.note_id ?? card?.noteId ?? '';
|
||||
const xsecToken =
|
||||
raw.xsecToken ?? raw.xsec_token ?? card?.xsecToken ?? card?.xsec_token ?? '';
|
||||
const title =
|
||||
raw.displayTitle ?? raw.display_title ?? raw.title ??
|
||||
card?.displayTitle ?? card?.display_title ?? card?.title ?? '';
|
||||
const description =
|
||||
raw.desc ?? raw.description ?? card?.desc ?? '';
|
||||
|
||||
// Type detection — default to 'normal' if unclear.
|
||||
const rawType = raw.type ?? raw.model_type ?? card?.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Cover image URL — try multiple possible locations.
|
||||
const rawCover = raw.cover ?? card?.cover;
|
||||
const coverUrl = extractImageUrl(rawCover);
|
||||
|
||||
// User info
|
||||
const rawUser = raw.user ?? card?.user;
|
||||
const userId = rawUser?.userId ?? rawUser?.user_id ?? '';
|
||||
const nickname =
|
||||
rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '';
|
||||
const avatar =
|
||||
rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '';
|
||||
|
||||
// Like count — can be in interactInfo, or directly on the item.
|
||||
const interactInfo = raw.interactInfo ?? raw.interact_info ?? card?.interactInfo ?? card?.interact_info;
|
||||
const likeCountStr =
|
||||
interactInfo?.likedCount ?? interactInfo?.liked_count ??
|
||||
interactInfo?.likeCount ?? interactInfo?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
// Must have at least an ID to be a valid feed.
|
||||
if (!id) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: {
|
||||
id: userId,
|
||||
nickname,
|
||||
avatar,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape feed data using Playwright's Node-side selectors (`page.$$`,
|
||||
* `page.$eval`) to avoid needing DOM lib types in our TypeScript config.
|
||||
*/
|
||||
async function scrapeFeedsFromDom(page: Page): Promise<Feed[]> {
|
||||
// Wait for at least one feed card to appear.
|
||||
await page.waitForSelector('.note-item', { timeout: 10_000 }).catch(() => null);
|
||||
|
||||
const cardElements = await page.$$('.note-item');
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of cardElements) {
|
||||
try {
|
||||
// Extract feed link to get ID and xsec_token from the URL.
|
||||
const href = await card.$eval('a.cover', (el) => el.getAttribute('href') ?? '').catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const xsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
// Cover image
|
||||
const coverUrl = await card.$eval('a.cover img', (el) => el.getAttribute('src') ?? '').catch(() => '');
|
||||
|
||||
// Title
|
||||
const title = await card.$eval('.footer .title', (el) => el.textContent?.trim() ?? '').catch(() => '');
|
||||
|
||||
// Author name
|
||||
const nickname = await card.$eval('.footer .author-wrapper .name', (el) => el.textContent?.trim() ?? '').catch(() => '');
|
||||
|
||||
// Author avatar
|
||||
const avatar = await card.$eval('.footer .author-wrapper .author-head img', (el) => el.getAttribute('src') ?? '').catch(() => '');
|
||||
|
||||
// Author ID from link
|
||||
const authorHref = await card.$eval('.footer .author-wrapper a', (el) => el.getAttribute('href') ?? '').catch(() => '');
|
||||
const authorIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
// Like count
|
||||
const likeText = await card.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0').catch(() => '0');
|
||||
const likeCount = parseCountString(likeText);
|
||||
|
||||
// Type — check if there is a video icon.
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
});
|
||||
} catch {
|
||||
// Skip cards that fail to parse.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract an image URL from the raw cover image object, trying multiple
|
||||
* possible field names.
|
||||
*/
|
||||
function extractImageUrl(raw: RawImage | undefined): string {
|
||||
if (!raw) return '';
|
||||
|
||||
// Direct URL fields
|
||||
if (raw.url) return ensureHttps(raw.url);
|
||||
if (raw.urlPre) return ensureHttps(raw.urlPre);
|
||||
if (raw.urlDefault) return ensureHttps(raw.urlDefault);
|
||||
if (raw.url_pre) return ensureHttps(raw.url_pre);
|
||||
if (raw.url_default) return ensureHttps(raw.url_default);
|
||||
|
||||
// infoList — array of image variants, take the first.
|
||||
const infoList = raw.infoList ?? raw.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
return ensureHttps(infoList[0].url);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure a URL has an https:// prefix. Xiaohongshu sometimes returns
|
||||
* protocol-relative URLs (//sns-...) or bare http.
|
||||
*/
|
||||
function ensureHttps(url: string): string {
|
||||
if (url.startsWith('//')) return `https:${url}`;
|
||||
if (url.startsWith('http://')) return url.replace('http://', 'https://');
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a count string that may contain abbreviations like "1.2w" (万) or
|
||||
* "3k" into a number.
|
||||
*/
|
||||
function parseCountString(str: string): number {
|
||||
if (!str) return 0;
|
||||
const cleaned = str.replace(/,/g, '').trim().toLowerCase();
|
||||
if (cleaned.includes('w') || cleaned.includes('万')) {
|
||||
return Math.round(parseFloat(cleaned) * 10_000);
|
||||
}
|
||||
if (cleaned.includes('k') || cleaned.includes('千')) {
|
||||
return Math.round(parseFloat(cleaned) * 1_000);
|
||||
}
|
||||
const n = parseInt(cleaned, 10);
|
||||
return isNaN(n) ? 0 : n;
|
||||
}
|
||||
|
||||
// Re-export for use by other modules (search, user-profile, feed-detail)
|
||||
// that need the same extraction / parsing helpers.
|
||||
export { extractInitialState, parseCountString, ensureHttps };
|
||||
@@ -0,0 +1,537 @@
|
||||
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import type { Router } from 'express';
|
||||
|
||||
import type { BrowserManager } from '../../browser/manager.js';
|
||||
import { config } from '../../config/index.js';
|
||||
import { withErrorHandling } from '../../utils/errors.js';
|
||||
import { validateMediaPath } from '../../utils/downloader.js';
|
||||
import { checkLoginStatus, getLoginQRCode, deleteCookies } from './login.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { getFeedDetail } from './feed-detail.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
import { publishImageNote } from './publish.js';
|
||||
import { publishVideoNote } from './publish-video.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { toggleLike, toggleFavorite } from './interaction.js';
|
||||
import { createXhsRoutes } from './routes.js';
|
||||
import {
|
||||
CheckLoginSchema,
|
||||
GetLoginQRCodeSchema,
|
||||
DeleteCookiesSchema,
|
||||
ListFeedsSchema,
|
||||
SearchSchema,
|
||||
GetFeedDetailSchema,
|
||||
GetUserProfileSchema,
|
||||
PublishImageSchema,
|
||||
PublishVideoSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
LikeSchema,
|
||||
FavoriteSchema,
|
||||
} from './schemas.js';
|
||||
import type { SearchFilters } from './types.js';
|
||||
import type { PlatformPlugin } from '../../server/app.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
|
||||
/** Maximum file size for video uploads (500 MB). */
|
||||
const VIDEO_MAX_SIZE_MB = 500;
|
||||
|
||||
/** Maximum file size for image uploads (20 MB — default in validateMediaPath). */
|
||||
const IMAGE_MAX_SIZE_MB = 20;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PlatformPlugin implementation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const xiaohongshuPlugin: PlatformPlugin = {
|
||||
name: PLATFORM,
|
||||
|
||||
// =========================================================================
|
||||
// REST API routes (Phase 5)
|
||||
// =========================================================================
|
||||
|
||||
registerRoutes(router: Router, browser: BrowserManager): void {
|
||||
const xhsRouter = createXhsRoutes(browser);
|
||||
router.use('/', xhsRouter);
|
||||
},
|
||||
|
||||
// =========================================================================
|
||||
// MCP tools
|
||||
// =========================================================================
|
||||
|
||||
registerTools(server: McpServer, browser: BrowserManager): void {
|
||||
// =====================================================================
|
||||
// Phase 2: Login management (3 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_check_login
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_check_login',
|
||||
'Check Xiaohongshu login status',
|
||||
CheckLoginSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_check_login', async () => {
|
||||
const timeoutMs = config.operationTimeouts['login'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const status = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => checkLoginStatus(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(status),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_get_login_qrcode
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_get_login_qrcode',
|
||||
'Get Xiaohongshu login QR code (user scans with phone)',
|
||||
GetLoginQRCodeSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_get_login_qrcode', async () => {
|
||||
const result = await getLoginQRCode(browser);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_delete_cookies
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_delete_cookies',
|
||||
'Delete Xiaohongshu cookies and reset login session',
|
||||
DeleteCookiesSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_delete_cookies', async () => {
|
||||
await deleteCookies(browser);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify({ success: true, message: 'Cookies deleted' }),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// =====================================================================
|
||||
// Phase 3: Content browsing (4 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_list_feeds
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_list_feeds',
|
||||
'Get Xiaohongshu explore page recommended feed list',
|
||||
ListFeedsSchema,
|
||||
async () => {
|
||||
return withErrorHandling('xhs_list_feeds', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => listFeeds(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(feeds),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_search
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_search',
|
||||
'Search Xiaohongshu notes by keyword with optional filters (sort, type, time range)',
|
||||
SearchSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_search', async () => {
|
||||
const timeoutMs = config.operationTimeouts['search'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const filters: SearchFilters | undefined = args.filters
|
||||
? {
|
||||
sort: args.filters.sort,
|
||||
type: args.filters.type,
|
||||
time: args.filters.time,
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => searchFeeds(page, args.keyword, filters),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(feeds),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_get_feed_detail
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_get_feed_detail',
|
||||
'Get Xiaohongshu note detail including content, images, stats, and comments',
|
||||
GetFeedDetailSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_get_feed_detail', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const detail = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getFeedDetail(
|
||||
page,
|
||||
args.feed_id,
|
||||
args.xsec_token,
|
||||
args.load_all_comments,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(detail),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_get_user_profile
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_get_user_profile',
|
||||
'Get Xiaohongshu user profile information including bio, stats, and recent notes',
|
||||
GetUserProfileSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_get_user_profile', async () => {
|
||||
const timeoutMs = config.operationTimeouts['user_profile'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
|
||||
const profile = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getUserProfile(page, args.user_id, args.xsec_token),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(profile),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// =====================================================================
|
||||
// Phase 4: Content publishing (2 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_publish_image
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_publish_image',
|
||||
'Publish an image note on Xiaohongshu. Provide local file paths for images.',
|
||||
PublishImageSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_publish_image', async () => {
|
||||
// Fail fast: validate all image paths BEFORE acquiring a browser page.
|
||||
const validatedPaths: string[] = [];
|
||||
for (const imagePath of args.images) {
|
||||
const resolved = await validateMediaPath(imagePath, {
|
||||
maxSizeMB: IMAGE_MAX_SIZE_MB,
|
||||
});
|
||||
validatedPaths.push(resolved);
|
||||
}
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishImageNote(page, args.title, args.content, validatedPaths, {
|
||||
tags: args.tags,
|
||||
scheduleAt: args.schedule_at,
|
||||
isOriginal: args.is_original,
|
||||
visibility: args.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_publish_video
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_publish_video',
|
||||
'Publish a video note on Xiaohongshu. Provide a local file path for the video.',
|
||||
PublishVideoSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_publish_video', async () => {
|
||||
// Fail fast: validate the video path BEFORE acquiring a browser page.
|
||||
const validatedPath = await validateMediaPath(args.video, {
|
||||
maxSizeMB: VIDEO_MAX_SIZE_MB,
|
||||
});
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishVideoNote(page, args.title, args.content, validatedPath, {
|
||||
tags: args.tags,
|
||||
scheduleAt: args.schedule_at,
|
||||
visibility: args.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// =====================================================================
|
||||
// Phase 4: Interactions (4 tools)
|
||||
// =====================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_post_comment
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_post_comment',
|
||||
'Post a comment on a Xiaohongshu note',
|
||||
PostCommentSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_post_comment', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['comment'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
postComment(page, args.feed_id, args.xsec_token, args.content),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_reply_comment
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_reply_comment',
|
||||
'Reply to a comment on a Xiaohongshu note',
|
||||
ReplyCommentSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_reply_comment', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['reply'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
replyComment(
|
||||
page,
|
||||
args.feed_id,
|
||||
args.xsec_token,
|
||||
args.content,
|
||||
args.comment_id,
|
||||
args.user_id,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_like
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_like',
|
||||
'Like or unlike a Xiaohongshu note',
|
||||
LikeSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_like', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['like'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleLike(page, args.feed_id, args.xsec_token, args.unlike),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// xhs_favorite
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
server.tool(
|
||||
'xhs_favorite',
|
||||
'Favorite or unfavorite a Xiaohongshu note',
|
||||
FavoriteSchema,
|
||||
async (args) => {
|
||||
return withErrorHandling('xhs_favorite', async () => {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['favorite'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleFavorite(
|
||||
page,
|
||||
args.feed_id,
|
||||
args.xsec_token,
|
||||
args.unfavorite,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
},
|
||||
);
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,214 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Wait after clicking like/favorite to let the state update. */
|
||||
const TOGGLE_SETTLE_MS = 1_000;
|
||||
|
||||
const selInteraction = XHS_SELECTORS.interaction;
|
||||
const selDetail = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-interaction' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toggleLike
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Like or unlike a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param unlike - If true, unlike the note (toggle off). Default: false.
|
||||
* @returns Object with success status and the resulting liked state.
|
||||
*/
|
||||
export async function toggleLike(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
unlike?: boolean,
|
||||
): Promise<{ success: boolean; liked: boolean }> {
|
||||
log.info({ feedId, unlike: unlike ?? false }, 'Toggling like on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note container and interaction bar to be visible.
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Check the current like state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isCurrentlyLiked = await isElementActive(
|
||||
page,
|
||||
selInteraction.likeButtonActive,
|
||||
);
|
||||
|
||||
log.debug({ isCurrentlyLiked, desiredUnlike: unlike ?? false }, 'Current like state');
|
||||
|
||||
// Determine whether we need to toggle.
|
||||
// - unlike=true means we want the note to NOT be liked → toggle only if currently liked.
|
||||
// - unlike=false means we want the note to BE liked → toggle only if currently not liked.
|
||||
const shouldToggle = unlike ? isCurrentlyLiked : !isCurrentlyLiked;
|
||||
|
||||
if (!shouldToggle) {
|
||||
// Already in the desired state — no action needed.
|
||||
const liked = !unlike;
|
||||
log.info({ feedId, liked, alreadyInState: true }, 'Like already in desired state');
|
||||
return { success: true, liked };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Click the like button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const likeBtn = await page.$(selInteraction.likeButton);
|
||||
|
||||
if (!likeBtn) {
|
||||
log.warn('Like button not found on feed detail page');
|
||||
return { success: false, liked: isCurrentlyLiked };
|
||||
}
|
||||
|
||||
await likeBtn.click();
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Verify the new state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isNowLiked = await isElementActive(
|
||||
page,
|
||||
selInteraction.likeButtonActive,
|
||||
);
|
||||
|
||||
const expectedLiked = !unlike;
|
||||
const success = isNowLiked === expectedLiked;
|
||||
|
||||
log.info({ feedId, liked: isNowLiked, success }, 'Like toggle complete');
|
||||
|
||||
return { success, liked: isNowLiked };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toggleFavorite
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Favorite or unfavorite a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param unfavorite - If true, unfavorite the note (toggle off). Default: false.
|
||||
* @returns Object with success status and the resulting favorited state.
|
||||
*/
|
||||
export async function toggleFavorite(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
unfavorite?: boolean,
|
||||
): Promise<{ success: boolean; favorited: boolean }> {
|
||||
log.info({ feedId, unfavorite: unfavorite ?? false }, 'Toggling favorite on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Check the current favorite state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isCurrentlyFavorited = await isElementActive(
|
||||
page,
|
||||
selInteraction.favoriteButtonActive,
|
||||
);
|
||||
|
||||
log.debug(
|
||||
{ isCurrentlyFavorited, desiredUnfavorite: unfavorite ?? false },
|
||||
'Current favorite state',
|
||||
);
|
||||
|
||||
const shouldToggle = unfavorite ? isCurrentlyFavorited : !isCurrentlyFavorited;
|
||||
|
||||
if (!shouldToggle) {
|
||||
const favorited = !unfavorite;
|
||||
log.info(
|
||||
{ feedId, favorited, alreadyInState: true },
|
||||
'Favorite already in desired state',
|
||||
);
|
||||
return { success: true, favorited };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Click the favorite button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const favBtn = await page.$(selInteraction.favoriteButton);
|
||||
|
||||
if (!favBtn) {
|
||||
log.warn('Favorite button not found on feed detail page');
|
||||
return { success: false, favorited: isCurrentlyFavorited };
|
||||
}
|
||||
|
||||
await favBtn.click();
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Verify the new state
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const isNowFavorited = await isElementActive(
|
||||
page,
|
||||
selInteraction.favoriteButtonActive,
|
||||
);
|
||||
|
||||
const expectedFavorited = !unfavorite;
|
||||
const success = isNowFavorited === expectedFavorited;
|
||||
|
||||
log.info({ feedId, favorited: isNowFavorited, success }, 'Favorite toggle complete');
|
||||
|
||||
return { success, favorited: isNowFavorited };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the URL for a feed detail page.
|
||||
*/
|
||||
function buildFeedUrl(feedId: string, xsecToken: string): string {
|
||||
return `${FEED_DETAIL_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_search`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether an element matching the given selector exists on the page.
|
||||
* Used to determine the active/inactive state of like/favorite buttons.
|
||||
*
|
||||
* The selector for the "active" state uses CSS classes that are only present
|
||||
* when the button is in its toggled-on state (e.g. `.like-wrapper.active`).
|
||||
*/
|
||||
async function isElementActive(page: Page, selector: string): Promise<boolean> {
|
||||
const el = await page.$(selector);
|
||||
return el !== null;
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import type { BrowserManager } from '../../browser/manager.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { cookieStore } from '../../cookie/store.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import type { LoginStatus, QRCodeResult } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** How long to wait for the user to scan the QR code (4 minutes). */
|
||||
const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000;
|
||||
|
||||
const log = logger.child({ module: 'xhs-login' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// checkLoginStatus
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to the explore page and determine whether the user is logged in
|
||||
* by checking for the presence of the logged-in indicator element.
|
||||
*
|
||||
* @param page - A Playwright Page already managed by the caller.
|
||||
* @returns An object indicating login status.
|
||||
*/
|
||||
export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Give the page a moment for client-side rendering to settle.
|
||||
const indicator = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 5_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (!indicator) {
|
||||
return { loggedIn: false };
|
||||
}
|
||||
|
||||
// Attempt to extract a username from the indicator area.
|
||||
const username = await indicator.textContent().catch(() => null);
|
||||
|
||||
return {
|
||||
loggedIn: true,
|
||||
...(username ? { username: username.trim() } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getLoginQRCode
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Open the explore page, trigger the login modal if needed, and extract the
|
||||
* QR code image data.
|
||||
*
|
||||
* Because the user must scan the QR code with their phone (which takes an
|
||||
* indeterminate amount of time), this function uses `acquirePage` instead of
|
||||
* `withPage`. A fire-and-forget background task waits for the scan to
|
||||
* complete, saves cookies, and releases the page.
|
||||
*
|
||||
* @param browser - The shared BrowserManager instance.
|
||||
* @returns QR code data or an indication that the user is already logged in.
|
||||
*/
|
||||
export async function getLoginQRCode(
|
||||
browser: BrowserManager,
|
||||
): Promise<QRCodeResult> {
|
||||
const { page, release } = await browser.acquirePage(PLATFORM);
|
||||
|
||||
try {
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check whether the user is already logged in.
|
||||
const alreadyLoggedIn = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
if (alreadyLoggedIn) {
|
||||
await release();
|
||||
return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' };
|
||||
}
|
||||
|
||||
// If the QR code is not visible yet, click the login button to open it.
|
||||
const qrVisible = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 3_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
if (!qrVisible) {
|
||||
const loginBtn = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loginButton, { timeout: 5_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (loginBtn) {
|
||||
await loginBtn.click();
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the QR code image to appear.
|
||||
const qrElement = await page.waitForSelector(
|
||||
XHS_SELECTORS.login.qrCodeImage,
|
||||
{ timeout: 10_000 },
|
||||
);
|
||||
|
||||
const qrcodeData = await qrElement.getAttribute('src');
|
||||
|
||||
if (!qrcodeData) {
|
||||
await release();
|
||||
throw new Error('QR code image src attribute is empty');
|
||||
}
|
||||
|
||||
// Fire-and-forget: wait for the user to scan the QR code in the
|
||||
// background. On success, save cookies and release the page. On
|
||||
// failure or timeout, just release the page. The `.catch()` ensures
|
||||
// no unhandled rejection escapes.
|
||||
waitForLoginAndRelease(page, browser, release).catch((err: unknown) => {
|
||||
log.error({ err }, 'Login wait flow encountered an unexpected error');
|
||||
});
|
||||
|
||||
return {
|
||||
qrcodeData,
|
||||
alreadyLoggedIn: false,
|
||||
timeout: '4m',
|
||||
};
|
||||
} catch (err) {
|
||||
// If anything goes wrong before we hand off to the background task,
|
||||
// make sure the page is released.
|
||||
await release();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// deleteCookies
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Delete persisted cookies for the Xiaohongshu platform.
|
||||
*
|
||||
* @param _browser - The shared BrowserManager instance (unused for now but
|
||||
* passed for consistency; a future version may also clear
|
||||
* the in-memory browser context).
|
||||
*/
|
||||
export async function deleteCookies(_browser: BrowserManager): Promise<void> {
|
||||
await cookieStore.delete(PLATFORM);
|
||||
log.info('Xiaohongshu cookies deleted');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal: waitForLoginAndRelease
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Background task that waits for the logged-in indicator to appear (meaning
|
||||
* the user has scanned the QR code). On success it persists cookies. The
|
||||
* page is released in all cases (success, timeout, error) via `finally`.
|
||||
*
|
||||
* `release` is idempotent (guaranteed by BrowserManager.acquirePage), so
|
||||
* even if the safety-net timer inside acquirePage fires concurrently, there
|
||||
* is no double-close.
|
||||
*/
|
||||
async function waitForLoginAndRelease(
|
||||
page: Page,
|
||||
browser: BrowserManager,
|
||||
release: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
try {
|
||||
await page.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, {
|
||||
timeout: QR_SCAN_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
log.info('QR code scanned — login detected, saving cookies');
|
||||
await browser.saveCookies(PLATFORM);
|
||||
} catch {
|
||||
// Timeout or page closed — not an error, just means the user did not
|
||||
// scan in time (or the page was released by the safety timer).
|
||||
log.debug('Login wait ended without successful scan');
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,313 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CREATOR_PUBLISH_URL = 'https://creator.xiaohongshu.com/publish/publish';
|
||||
|
||||
/**
|
||||
* Maximum time to wait for video upload to complete.
|
||||
* Videos can be up to 500 MB, so we allow up to 4 minutes for upload.
|
||||
*/
|
||||
const VIDEO_UPLOAD_TIMEOUT_MS = 240_000;
|
||||
|
||||
/** Wait after upload completes to let the UI settle. */
|
||||
const UPLOAD_SETTLE_MS = 2_000;
|
||||
|
||||
/** Wait after filling form fields. */
|
||||
const FIELD_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after clicking publish before checking result. */
|
||||
const PUBLISH_SETTLE_MS = 3_000;
|
||||
|
||||
const sel = XHS_SELECTORS.publish;
|
||||
|
||||
const log = logger.child({ module: 'xhs-publish-video' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// publishVideoNote
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PublishVideoOptions {
|
||||
tags?: string[];
|
||||
scheduleAt?: string;
|
||||
visibility?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Publish a video note on Xiaohongshu via the creator center UI.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param title - Note title (required, non-empty).
|
||||
* @param content - Note body / description text.
|
||||
* @param videoPath - Local path to the video file (already validated by caller).
|
||||
* @param options - Optional tags, schedule, and visibility.
|
||||
* @returns Object indicating success and an optional noteId if detectable.
|
||||
*/
|
||||
export async function publishVideoNote(
|
||||
page: Page,
|
||||
title: string,
|
||||
content: string,
|
||||
videoPath: string,
|
||||
options?: PublishVideoOptions,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
log.info(
|
||||
{ hasOptions: !!options },
|
||||
'Starting video note publish',
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the creator publish page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.goto(CREATOR_PUBLISH_URL, { waitUntil: 'domcontentloaded' });
|
||||
// Allow the SPA to hydrate.
|
||||
await page.waitForTimeout(2_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Upload the video via the file input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// The creator page uses the same file input for both images and video.
|
||||
// The platform detects the file type from the uploaded content.
|
||||
const fileInput = await page.waitForSelector(sel.imageFileInput, {
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
await fileInput.setInputFiles(videoPath);
|
||||
|
||||
log.debug('Video file set on input element, waiting for upload to complete');
|
||||
|
||||
// Wait for the video thumbnail / player to appear, indicating upload is done.
|
||||
// Video uploads take significantly longer than images.
|
||||
await page.waitForSelector(sel.uploadedVideoItem, {
|
||||
timeout: VIDEO_UPLOAD_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
// Give the UI time to settle after video processing.
|
||||
await page.waitForTimeout(UPLOAD_SETTLE_MS);
|
||||
|
||||
log.debug('Video uploaded successfully');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Fill in the title
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const titleInput = await page.waitForSelector(sel.titleInput, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await titleInput.click();
|
||||
await titleInput.fill('');
|
||||
await page.keyboard.type(title, { delay: 30 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Fill in the content / description
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const contentEditor = await page.waitForSelector(sel.contentEditor, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await contentEditor.click();
|
||||
await page.keyboard.type(content, { delay: 20 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Add tags (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.tags && options.tags.length > 0) {
|
||||
await addTags(page, options.tags);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 6. Set visibility (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.visibility && options.visibility !== 'public') {
|
||||
await setVisibility(page, options.visibility);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 7. Set schedule (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.scheduleAt) {
|
||||
await setSchedule(page, options.scheduleAt);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 8. Click the publish button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const publishBtn = await page.waitForSelector(sel.publishButton, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await publishBtn.click();
|
||||
|
||||
log.debug('Publish button clicked, waiting for success');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 9. Wait for success indicator
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const result = await waitForPublishResult(page);
|
||||
|
||||
log.info({ result }, 'Video note publish complete');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers (shared patterns with publish.ts — kept separate to
|
||||
// avoid circular imports and keep each module self-contained)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Add hashtag / topic tags by typing into the content editor.
|
||||
*/
|
||||
async function addTags(page: Page, tags: string[]): Promise<void> {
|
||||
for (const tag of tags) {
|
||||
const editor = await page.$(sel.contentEditor);
|
||||
if (editor) {
|
||||
await editor.click();
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
|
||||
await page.keyboard.type(`#${tag}`, { delay: 50 });
|
||||
await page.waitForTimeout(800);
|
||||
|
||||
const suggestion = await page
|
||||
.waitForSelector(sel.tagSuggestionItem, { timeout: 3_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (suggestion) {
|
||||
await suggestion.click();
|
||||
} else {
|
||||
await page.keyboard.press('Enter');
|
||||
}
|
||||
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the note visibility (private or friends-only).
|
||||
*/
|
||||
async function setVisibility(page: Page, visibility: string): Promise<void> {
|
||||
const visBtn = await page.$(sel.visibilityButton);
|
||||
if (!visBtn) {
|
||||
log.debug('Visibility button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await visBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
let optionSelector: string;
|
||||
switch (visibility) {
|
||||
case 'private':
|
||||
optionSelector = sel.visibilityPrivate;
|
||||
break;
|
||||
case 'friends':
|
||||
optionSelector = sel.visibilityFriends;
|
||||
break;
|
||||
default:
|
||||
optionSelector = sel.visibilityPublic;
|
||||
break;
|
||||
}
|
||||
|
||||
const option = await page.$(optionSelector);
|
||||
if (option) {
|
||||
await option.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug({ visibility }, 'Visibility option not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the schedule / timing picker and set the publish date.
|
||||
*/
|
||||
async function setSchedule(page: Page, scheduleAt: string): Promise<void> {
|
||||
const scheduleBtn = await page.$(sel.scheduleButton);
|
||||
if (!scheduleBtn) {
|
||||
log.debug('Schedule button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await scheduleBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
const scheduleInput = await page.$(sel.scheduleInput);
|
||||
if (scheduleInput) {
|
||||
await scheduleInput.click();
|
||||
await scheduleInput.fill('');
|
||||
await page.keyboard.type(scheduleAt, { delay: 30 });
|
||||
await page.keyboard.press('Enter');
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug('Schedule input not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the publish success indicator.
|
||||
*/
|
||||
async function waitForPublishResult(
|
||||
page: Page,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
const urlChangePromise = page
|
||||
.waitForURL(sel.publishSuccessUrlPattern, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
const successElementPromise = page
|
||||
.waitForSelector(sel.publishSuccess, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
await page.waitForTimeout(PUBLISH_SETTLE_MS);
|
||||
|
||||
const [urlChanged, elementAppeared] = await Promise.all([
|
||||
urlChangePromise,
|
||||
successElementPromise,
|
||||
]);
|
||||
|
||||
if (!urlChanged && !elementAppeared) {
|
||||
const pageContent = await page.content();
|
||||
const hasSuccessText =
|
||||
pageContent.includes('发布成功') || pageContent.includes('已发布');
|
||||
|
||||
if (!hasSuccessText) {
|
||||
log.warn('No success indicator found after video publish');
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
const noteId = extractNoteIdFromUrl(page.url());
|
||||
|
||||
return { success: true, noteId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to extract a note / post ID from the URL after successful publish.
|
||||
*/
|
||||
function extractNoteIdFromUrl(url: string): string | undefined {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const noteIdParam = parsed.searchParams.get('noteId');
|
||||
if (noteIdParam) return noteIdParam;
|
||||
|
||||
const pathMatch = parsed.pathname.match(/\/note\/([a-f0-9]+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
} catch {
|
||||
// URL parsing failed — noteId is optional.
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
@@ -0,0 +1,375 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CREATOR_PUBLISH_URL = 'https://creator.xiaohongshu.com/publish/publish';
|
||||
|
||||
/** Maximum time to wait for image uploads to finish (60 seconds). */
|
||||
const UPLOAD_WAIT_TIMEOUT_MS = 60_000;
|
||||
|
||||
/** Polling interval for checking upload completion. */
|
||||
const UPLOAD_POLL_INTERVAL_MS = 500;
|
||||
|
||||
/** Wait between image upload completions to let the UI settle. */
|
||||
const UPLOAD_SETTLE_MS = 1_500;
|
||||
|
||||
/** Wait after filling form fields to let debounce / auto-save settle. */
|
||||
const FIELD_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after clicking publish before checking result. */
|
||||
const PUBLISH_SETTLE_MS = 3_000;
|
||||
|
||||
const sel = XHS_SELECTORS.publish;
|
||||
|
||||
const log = logger.child({ module: 'xhs-publish' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// publishImageNote
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PublishImageOptions {
|
||||
tags?: string[];
|
||||
scheduleAt?: string;
|
||||
isOriginal?: boolean;
|
||||
visibility?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Publish an image note on Xiaohongshu via the creator center UI.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param title - Note title (required, non-empty).
|
||||
* @param content - Note body / description text.
|
||||
* @param imagePaths - Array of local file paths (already validated by caller).
|
||||
* @param options - Optional tags, schedule, original flag, and visibility.
|
||||
* @returns Object indicating success and an optional noteId if detectable.
|
||||
*/
|
||||
export async function publishImageNote(
|
||||
page: Page,
|
||||
title: string,
|
||||
content: string,
|
||||
imagePaths: string[],
|
||||
options?: PublishImageOptions,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
log.info(
|
||||
{ imageCount: imagePaths.length, hasOptions: !!options },
|
||||
'Starting image note publish',
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the creator publish page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.goto(CREATOR_PUBLISH_URL, { waitUntil: 'domcontentloaded' });
|
||||
// Allow the SPA to hydrate.
|
||||
await page.waitForTimeout(2_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Upload images via the file input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const fileInput = await page.waitForSelector(sel.imageFileInput, {
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
// Playwright's setInputFiles supports multiple files at once.
|
||||
await fileInput.setInputFiles(imagePaths);
|
||||
|
||||
log.debug({ count: imagePaths.length }, 'Files set on input element');
|
||||
|
||||
// Wait for all image thumbnails to appear (one per uploaded image).
|
||||
// Poll using page.$$ (Node-side API) to avoid needing browser-context
|
||||
// DOM types which are not available in our TypeScript lib config.
|
||||
await waitForUploadedImages(page, imagePaths.length);
|
||||
|
||||
// Give the UI a moment to settle after all uploads.
|
||||
await page.waitForTimeout(UPLOAD_SETTLE_MS);
|
||||
|
||||
log.debug('All images uploaded successfully');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Fill in the title
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const titleInput = await page.waitForSelector(sel.titleInput, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await titleInput.click();
|
||||
await titleInput.fill('');
|
||||
await page.keyboard.type(title, { delay: 30 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Fill in the content / description
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const contentEditor = await page.waitForSelector(sel.contentEditor, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await contentEditor.click();
|
||||
await page.keyboard.type(content, { delay: 20 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Add tags (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.tags && options.tags.length > 0) {
|
||||
await addTags(page, options.tags);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 6. Set original flag (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.isOriginal) {
|
||||
await setOriginal(page);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 7. Set visibility (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.visibility && options.visibility !== 'public') {
|
||||
await setVisibility(page, options.visibility);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 8. Set schedule (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.scheduleAt) {
|
||||
await setSchedule(page, options.scheduleAt);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 9. Click the publish button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const publishBtn = await page.waitForSelector(sel.publishButton, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await publishBtn.click();
|
||||
|
||||
log.debug('Publish button clicked, waiting for success');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 10. Wait for success indicator
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const result = await waitForPublishResult(page);
|
||||
|
||||
log.info({ result }, 'Image note publish complete');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Poll until the expected number of uploaded image thumbnails appear on the
|
||||
* page. Uses `page.$$` (Node-side) so we don't need browser-context DOM
|
||||
* types in our TypeScript configuration.
|
||||
*/
|
||||
async function waitForUploadedImages(
|
||||
page: Page,
|
||||
expectedCount: number,
|
||||
): Promise<void> {
|
||||
const deadline = Date.now() + UPLOAD_WAIT_TIMEOUT_MS;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const items = await page.$$(sel.uploadedImageItem);
|
||||
if (items.length >= expectedCount) {
|
||||
return;
|
||||
}
|
||||
await page.waitForTimeout(UPLOAD_POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Timed out waiting for ${String(expectedCount)} uploaded images after ${String(UPLOAD_WAIT_TIMEOUT_MS)}ms`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add hashtag / topic tags by typing into the tag input.
|
||||
* For each tag, type the `#` prefix plus tag text, then select from
|
||||
* the dropdown suggestion or press Enter.
|
||||
*/
|
||||
async function addTags(page: Page, tags: string[]): Promise<void> {
|
||||
for (const tag of tags) {
|
||||
// Click the content editor to ensure we're in the right context,
|
||||
// then type `#` + tag text which triggers the topic selector.
|
||||
const editor = await page.$(sel.contentEditor);
|
||||
if (editor) {
|
||||
await editor.click();
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
|
||||
// Type the hashtag prefix which triggers the topic dropdown
|
||||
await page.keyboard.type(`#${tag}`, { delay: 50 });
|
||||
await page.waitForTimeout(800);
|
||||
|
||||
// Try to click the first suggestion item; if not available, press Enter.
|
||||
const suggestion = await page
|
||||
.waitForSelector(sel.tagSuggestionItem, { timeout: 3_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (suggestion) {
|
||||
await suggestion.click();
|
||||
} else {
|
||||
await page.keyboard.press('Enter');
|
||||
}
|
||||
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the "original content" checkbox if it exists and is not already checked.
|
||||
*/
|
||||
async function setOriginal(page: Page): Promise<void> {
|
||||
const checkbox = await page.$(sel.originalCheckbox);
|
||||
if (checkbox) {
|
||||
const isChecked = await checkbox.isChecked();
|
||||
if (!isChecked) {
|
||||
await checkbox.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
} else {
|
||||
log.debug('Original checkbox not found, skipping');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the note visibility (private or friends-only).
|
||||
*/
|
||||
async function setVisibility(page: Page, visibility: string): Promise<void> {
|
||||
const visBtn = await page.$(sel.visibilityButton);
|
||||
if (!visBtn) {
|
||||
log.debug('Visibility button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await visBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
let optionSelector: string;
|
||||
switch (visibility) {
|
||||
case 'private':
|
||||
optionSelector = sel.visibilityPrivate;
|
||||
break;
|
||||
case 'friends':
|
||||
optionSelector = sel.visibilityFriends;
|
||||
break;
|
||||
default:
|
||||
optionSelector = sel.visibilityPublic;
|
||||
break;
|
||||
}
|
||||
|
||||
const option = await page.$(optionSelector);
|
||||
if (option) {
|
||||
await option.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug({ visibility }, 'Visibility option not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the schedule / timing picker and set the publish date.
|
||||
*/
|
||||
async function setSchedule(page: Page, scheduleAt: string): Promise<void> {
|
||||
const scheduleBtn = await page.$(sel.scheduleButton);
|
||||
if (!scheduleBtn) {
|
||||
log.debug('Schedule button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await scheduleBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
const scheduleInput = await page.$(sel.scheduleInput);
|
||||
if (scheduleInput) {
|
||||
await scheduleInput.click();
|
||||
await scheduleInput.fill('');
|
||||
await page.keyboard.type(scheduleAt, { delay: 30 });
|
||||
await page.keyboard.press('Enter');
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug('Schedule input not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the publish success indicator (URL change or success element).
|
||||
* Returns the result with an optional noteId extracted from the URL.
|
||||
*/
|
||||
async function waitForPublishResult(
|
||||
page: Page,
|
||||
): Promise<{ success: boolean; noteId?: string }> {
|
||||
// Strategy 1: Wait for the URL to change to a success page.
|
||||
// Strategy 2: Wait for a success element to appear.
|
||||
// Use Promise.all so both run concurrently.
|
||||
|
||||
const urlChangePromise = page
|
||||
.waitForURL(sel.publishSuccessUrlPattern, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
const successElementPromise = page
|
||||
.waitForSelector(sel.publishSuccess, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
// Also wait a short baseline for the button click to process.
|
||||
await page.waitForTimeout(PUBLISH_SETTLE_MS);
|
||||
|
||||
const [urlChanged, elementAppeared] = await Promise.all([
|
||||
urlChangePromise,
|
||||
successElementPromise,
|
||||
]);
|
||||
|
||||
if (!urlChanged && !elementAppeared) {
|
||||
// Final fallback: check if the page content indicates success.
|
||||
const pageContent = await page.content();
|
||||
const hasSuccessText =
|
||||
pageContent.includes('发布成功') || pageContent.includes('已发布');
|
||||
|
||||
if (!hasSuccessText) {
|
||||
log.warn('No success indicator found after publish');
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract the note ID from the current URL if available.
|
||||
const noteId = extractNoteIdFromUrl(page.url());
|
||||
|
||||
return { success: true, noteId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to extract a note / post ID from the URL after successful publish.
|
||||
*/
|
||||
function extractNoteIdFromUrl(url: string): string | undefined {
|
||||
// Pattern: /publish/success?noteId=xxx or /note/xxx
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const noteIdParam = parsed.searchParams.get('noteId');
|
||||
if (noteIdParam) return noteIdParam;
|
||||
|
||||
// Try path-based pattern: /note/<id>
|
||||
const pathMatch = parsed.pathname.match(/\/note\/([a-f0-9]+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
} catch {
|
||||
// URL parsing failed — not a problem, noteId is optional.
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
@@ -0,0 +1,590 @@
|
||||
import { Router } from 'express';
|
||||
import { z, ZodError } from 'zod';
|
||||
|
||||
import type { BrowserManager } from '../../browser/manager.js';
|
||||
import { config } from '../../config/index.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { classifyError, sanitizeErrorMessage } from '../../utils/errors.js';
|
||||
import { validateMediaPath } from '../../utils/downloader.js';
|
||||
import { rateLimiter } from '../../server/middleware.js';
|
||||
|
||||
import { checkLoginStatus, getLoginQRCode, deleteCookies } from './login.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { getFeedDetail } from './feed-detail.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
import { publishImageNote } from './publish.js';
|
||||
import { publishVideoNote } from './publish-video.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { toggleLike, toggleFavorite } from './interaction.js';
|
||||
|
||||
import {
|
||||
SearchSchema,
|
||||
GetFeedDetailSchema,
|
||||
GetUserProfileSchema,
|
||||
PublishImageSchema,
|
||||
PublishVideoSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
LikeSchema,
|
||||
FavoriteSchema,
|
||||
} from './schemas.js';
|
||||
|
||||
import type { SearchFilters } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
|
||||
/** Maximum file size for video uploads (500 MB). */
|
||||
const VIDEO_MAX_SIZE_MB = 500;
|
||||
|
||||
/** Maximum file size for image uploads (20 MB). */
|
||||
const IMAGE_MAX_SIZE_MB = 20;
|
||||
|
||||
const log = logger.child({ module: 'xhs-routes' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Zod schemas for REST body validation
|
||||
//
|
||||
// The MCP schemas in schemas.ts are "shape" objects (plain objects with zod
|
||||
// fields). For REST validation we wrap them in z.object() where needed.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const SearchBodySchema = z.object({
|
||||
keyword: SearchSchema.keyword,
|
||||
filters: SearchSchema.filters,
|
||||
});
|
||||
|
||||
const FeedDetailBodySchema = z.object({
|
||||
feed_id: GetFeedDetailSchema.feed_id,
|
||||
xsec_token: GetFeedDetailSchema.xsec_token,
|
||||
load_all_comments: GetFeedDetailSchema.load_all_comments,
|
||||
});
|
||||
|
||||
const UserProfileBodySchema = z.object({
|
||||
user_id: GetUserProfileSchema.user_id,
|
||||
xsec_token: GetUserProfileSchema.xsec_token,
|
||||
});
|
||||
|
||||
const PublishImageBodySchema = z.object({
|
||||
title: PublishImageSchema.title,
|
||||
content: PublishImageSchema.content,
|
||||
images: PublishImageSchema.images,
|
||||
tags: PublishImageSchema.tags,
|
||||
schedule_at: PublishImageSchema.schedule_at,
|
||||
is_original: PublishImageSchema.is_original,
|
||||
visibility: PublishImageSchema.visibility,
|
||||
});
|
||||
|
||||
const PublishVideoBodySchema = z.object({
|
||||
title: PublishVideoSchema.title,
|
||||
content: PublishVideoSchema.content,
|
||||
video: PublishVideoSchema.video,
|
||||
tags: PublishVideoSchema.tags,
|
||||
schedule_at: PublishVideoSchema.schedule_at,
|
||||
visibility: PublishVideoSchema.visibility,
|
||||
});
|
||||
|
||||
const PostCommentBodySchema = z.object({
|
||||
feed_id: PostCommentSchema.feed_id,
|
||||
xsec_token: PostCommentSchema.xsec_token,
|
||||
content: PostCommentSchema.content,
|
||||
});
|
||||
|
||||
const ReplyCommentBodySchema = z.object({
|
||||
feed_id: ReplyCommentSchema.feed_id,
|
||||
xsec_token: ReplyCommentSchema.xsec_token,
|
||||
content: ReplyCommentSchema.content,
|
||||
comment_id: ReplyCommentSchema.comment_id,
|
||||
user_id: ReplyCommentSchema.user_id,
|
||||
});
|
||||
|
||||
const LikeBodySchema = z.object({
|
||||
feed_id: LikeSchema.feed_id,
|
||||
xsec_token: LikeSchema.xsec_token,
|
||||
unlike: LikeSchema.unlike,
|
||||
});
|
||||
|
||||
const FavoriteBodySchema = z.object({
|
||||
feed_id: FavoriteSchema.feed_id,
|
||||
xsec_token: FavoriteSchema.xsec_token,
|
||||
unfavorite: FavoriteSchema.unfavorite,
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Standard JSON response helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface ApiSuccessResponse<T> {
|
||||
success: true;
|
||||
data: T;
|
||||
}
|
||||
|
||||
interface ApiErrorResponse {
|
||||
success: false;
|
||||
error: {
|
||||
code: string;
|
||||
message: string;
|
||||
};
|
||||
}
|
||||
|
||||
type ApiResponse<T> = ApiSuccessResponse<T> | ApiErrorResponse;
|
||||
|
||||
function successResponse<T>(data: T): ApiSuccessResponse<T> {
|
||||
return { success: true, data };
|
||||
}
|
||||
|
||||
function errorResponse(code: string, message: string): ApiErrorResponse {
|
||||
return { success: false, error: { code, message } };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Rate limiters
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const readRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 60 });
|
||||
const writeRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 10 });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Route factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create and return an Express Router with all Xiaohongshu REST API routes.
|
||||
*
|
||||
* Every handler calls the SAME action functions used by the MCP tools so
|
||||
* that business logic is never duplicated.
|
||||
*/
|
||||
export function createXhsRoutes(browser: BrowserManager): Router {
|
||||
const router = Router();
|
||||
|
||||
// =========================================================================
|
||||
// Login management
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GET /login/status
|
||||
// -----------------------------------------------------------------------
|
||||
router.get('/login/status', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['login'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const status = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => checkLoginStatus(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(status) as ApiResponse<typeof status>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GET /login/qrcode
|
||||
// -----------------------------------------------------------------------
|
||||
router.get('/login/qrcode', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const result = await getLoginQRCode(browser);
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DELETE /login/cookies
|
||||
// -----------------------------------------------------------------------
|
||||
router.delete('/login/cookies', writeRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
await deleteCookies(browser);
|
||||
res.json(successResponse({ message: 'Cookies deleted' }) as ApiResponse<{ message: string }>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Content browsing
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GET /feeds
|
||||
// -----------------------------------------------------------------------
|
||||
router.get('/feeds', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['feed_list'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => listFeeds(page),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(feeds) as ApiResponse<typeof feeds>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /search
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/search', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = SearchBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['search'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const filters: SearchFilters | undefined = body.filters
|
||||
? {
|
||||
sort: body.filters.sort,
|
||||
type: body.filters.type,
|
||||
time: body.filters.time,
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => searchFeeds(page, body.keyword, filters),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(feeds) as ApiResponse<typeof feeds>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /feeds/detail
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/feeds/detail', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FeedDetailBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['feed_detail'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const detail = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getFeedDetail(
|
||||
page,
|
||||
body.feed_id,
|
||||
body.xsec_token,
|
||||
body.load_all_comments,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(detail) as ApiResponse<typeof detail>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /user/profile
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/user/profile', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = UserProfileBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['user_profile'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const profile = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
getUserProfile(page, body.user_id, body.xsec_token),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(profile) as ApiResponse<typeof profile>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Content publishing
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /publish/image
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/publish/image', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PublishImageBodySchema.parse(req.body);
|
||||
|
||||
// Validate all image paths before acquiring a browser page.
|
||||
const validatedPaths: string[] = [];
|
||||
for (const imagePath of body.images) {
|
||||
const resolved = await validateMediaPath(imagePath, {
|
||||
maxSizeMB: IMAGE_MAX_SIZE_MB,
|
||||
});
|
||||
validatedPaths.push(resolved);
|
||||
}
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishImageNote(page, body.title, body.content, validatedPaths, {
|
||||
tags: body.tags,
|
||||
scheduleAt: body.schedule_at,
|
||||
isOriginal: body.is_original,
|
||||
visibility: body.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /publish/video
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/publish/video', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PublishVideoBodySchema.parse(req.body);
|
||||
|
||||
// Validate the video path before acquiring a browser page.
|
||||
const validatedPath = await validateMediaPath(body.video, {
|
||||
maxSizeMB: VIDEO_MAX_SIZE_MB,
|
||||
});
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['publish'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
300_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
publishVideoNote(page, body.title, body.content, validatedPath, {
|
||||
tags: body.tags,
|
||||
scheduleAt: body.schedule_at,
|
||||
visibility: body.visibility,
|
||||
}),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Interactions
|
||||
// =========================================================================
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /comment
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/comment', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PostCommentBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['comment'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
postComment(page, body.feed_id, body.xsec_token, body.content),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /comment/reply
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/comment/reply', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = ReplyCommentBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['reply'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
20_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
replyComment(
|
||||
page,
|
||||
body.feed_id,
|
||||
body.xsec_token,
|
||||
body.content,
|
||||
body.comment_id,
|
||||
body.user_id,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /like
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/like', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = LikeBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['like'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleLike(page, body.feed_id, body.xsec_token, body.unlike),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// POST /favorite
|
||||
// -----------------------------------------------------------------------
|
||||
router.post('/favorite', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FavoriteBodySchema.parse(req.body);
|
||||
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['favorite'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
15_000;
|
||||
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) =>
|
||||
toggleFavorite(
|
||||
page,
|
||||
body.feed_id,
|
||||
body.xsec_token,
|
||||
body.unfavorite,
|
||||
),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
res.json(successResponse(result) as ApiResponse<typeof result>);
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Error handling helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Unified error handler for REST route handlers.
|
||||
*
|
||||
* - ZodError -> 400 with VALIDATION_ERROR
|
||||
* - Business errors -> appropriate code from classifyError()
|
||||
* - Unknown errors -> 500
|
||||
*/
|
||||
function handleError(res: import('express').Response, err: unknown): void {
|
||||
if (err instanceof ZodError) {
|
||||
const issues = err.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
|
||||
res.status(400).json(errorResponse('VALIDATION_ERROR', issues));
|
||||
return;
|
||||
}
|
||||
|
||||
const error = err instanceof Error ? err : new Error(String(err));
|
||||
const category = classifyError(error);
|
||||
const message = sanitizeErrorMessage(error.message);
|
||||
|
||||
log.error({ err: error, category }, 'REST API handler error');
|
||||
|
||||
let statusCode: number;
|
||||
switch (category) {
|
||||
case 'AUTH_REQUIRED':
|
||||
statusCode = 401;
|
||||
break;
|
||||
case 'TIMEOUT':
|
||||
statusCode = 504;
|
||||
break;
|
||||
case 'NETWORK':
|
||||
statusCode = 502;
|
||||
break;
|
||||
default:
|
||||
statusCode = 500;
|
||||
break;
|
||||
}
|
||||
|
||||
res.status(statusCode).json(errorResponse(category, message));
|
||||
}
|
||||
@@ -0,0 +1,148 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MCP tool parameter schemas for all 13 Xiaohongshu tools.
|
||||
//
|
||||
// Phase 2 tools (login) have no parameters — their schemas are empty objects.
|
||||
// Phase 3/4 schemas are defined here so that the full tool surface is
|
||||
// established upfront and types can be inferred with z.infer<>.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// -- Phase 2: Login management (3 tools) -----------------------------------
|
||||
|
||||
/** xhs_check_login — no parameters. */
|
||||
export const CheckLoginSchema = {};
|
||||
|
||||
/** xhs_get_login_qrcode — no parameters. */
|
||||
export const GetLoginQRCodeSchema = {};
|
||||
|
||||
/** xhs_delete_cookies — no parameters. */
|
||||
export const DeleteCookiesSchema = {};
|
||||
|
||||
// -- Phase 3: Content browsing (4 tools) -----------------------------------
|
||||
|
||||
/** xhs_list_feeds — no parameters. */
|
||||
export const ListFeedsSchema = {};
|
||||
|
||||
/** xhs_search */
|
||||
export const SearchSchema = {
|
||||
keyword: z.string().describe('Search keyword'),
|
||||
filters: z
|
||||
.object({
|
||||
sort: z
|
||||
.enum(['general', 'time_descending', 'popularity_descending'])
|
||||
.optional()
|
||||
.describe('Sort order'),
|
||||
type: z
|
||||
.enum(['all', 'note', 'video'])
|
||||
.optional()
|
||||
.describe('Content type filter'),
|
||||
time: z
|
||||
.enum(['all', 'day', 'week', 'half_year'])
|
||||
.optional()
|
||||
.describe('Time range filter'),
|
||||
})
|
||||
.optional()
|
||||
.describe('Optional search filters'),
|
||||
};
|
||||
|
||||
/** xhs_get_feed_detail */
|
||||
export const GetFeedDetailSchema = {
|
||||
feed_id: z.string().describe('Feed (note) ID'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
load_all_comments: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Whether to scroll and load all comments'),
|
||||
};
|
||||
|
||||
/** xhs_get_user_profile */
|
||||
export const GetUserProfileSchema = {
|
||||
user_id: z.string().describe('User ID'),
|
||||
xsec_token: z.string().describe('Security token for the user page'),
|
||||
};
|
||||
|
||||
// -- Phase 4: Content publishing (2 tools) ---------------------------------
|
||||
|
||||
/** xhs_publish_image */
|
||||
export const PublishImageSchema = {
|
||||
title: z.string().min(1).describe('Note title'),
|
||||
content: z.string().describe('Note body text'),
|
||||
images: z
|
||||
.array(z.string())
|
||||
.min(1)
|
||||
.describe('Array of image file paths or URLs'),
|
||||
tags: z.array(z.string()).optional().describe('Hashtags to attach'),
|
||||
schedule_at: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('ISO 8601 datetime for scheduled publishing'),
|
||||
is_original: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Mark as original content'),
|
||||
visibility: z
|
||||
.enum(['public', 'private', 'friends'])
|
||||
.optional()
|
||||
.default('public')
|
||||
.describe('Visibility setting'),
|
||||
};
|
||||
|
||||
/** xhs_publish_video */
|
||||
export const PublishVideoSchema = {
|
||||
title: z.string().min(1).describe('Note title'),
|
||||
content: z.string().describe('Note body text'),
|
||||
video: z.string().describe('Video file path or URL'),
|
||||
tags: z.array(z.string()).optional().describe('Hashtags to attach'),
|
||||
schedule_at: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('ISO 8601 datetime for scheduled publishing'),
|
||||
visibility: z
|
||||
.enum(['public', 'private', 'friends'])
|
||||
.optional()
|
||||
.default('public')
|
||||
.describe('Visibility setting'),
|
||||
};
|
||||
|
||||
// -- Phase 4: Interactions (4 tools) ---------------------------------------
|
||||
|
||||
/** xhs_post_comment */
|
||||
export const PostCommentSchema = {
|
||||
feed_id: z.string().describe('Feed ID to comment on'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
content: z.string().min(1).describe('Comment text'),
|
||||
};
|
||||
|
||||
/** xhs_reply_comment */
|
||||
export const ReplyCommentSchema = {
|
||||
feed_id: z.string().describe('Feed ID'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
comment_id: z.string().optional().describe('Comment ID to reply to'),
|
||||
user_id: z.string().optional().describe('User ID of the comment author'),
|
||||
content: z.string().min(1).describe('Reply text'),
|
||||
};
|
||||
|
||||
/** xhs_like */
|
||||
export const LikeSchema = {
|
||||
feed_id: z.string().describe('Feed ID to like'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
unlike: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Set to true to unlike'),
|
||||
};
|
||||
|
||||
/** xhs_favorite */
|
||||
export const FavoriteSchema = {
|
||||
feed_id: z.string().describe('Feed ID to favorite'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
unfavorite: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Set to true to unfavorite'),
|
||||
};
|
||||
@@ -0,0 +1,387 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { Feed, SearchFilters } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const SEARCH_BASE_URL = 'https://www.xiaohongshu.com/search_result';
|
||||
|
||||
const log = logger.child({ module: 'xhs-search' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sort value mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Map our public sort enum values to the URL query parameter values. */
|
||||
const SORT_PARAM: Record<string, string> = {
|
||||
general: '0',
|
||||
time_descending: '1',
|
||||
popularity_descending: '2',
|
||||
};
|
||||
|
||||
/** Map our note type filter values to the URL query parameter values. */
|
||||
const TYPE_PARAM: Record<string, string> = {
|
||||
all: '0',
|
||||
note: '1',
|
||||
video: '2',
|
||||
};
|
||||
|
||||
/** Map time range filter values to URL query parameter values. */
|
||||
const TIME_PARAM: Record<string, string> = {
|
||||
all: '0',
|
||||
day: '1',
|
||||
week: '2',
|
||||
half_year: '3',
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for search results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawSearchFeedItem {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
name?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
noteCard?: RawSearchNoteCard;
|
||||
type?: string;
|
||||
model_type?: string;
|
||||
cover?: RawSearchImage;
|
||||
user?: RawSearchUser;
|
||||
interactInfo?: RawSearchInteractInfo;
|
||||
interact_info?: RawSearchInteractInfo;
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
interface RawSearchNoteCard {
|
||||
noteId?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: RawSearchImage;
|
||||
user?: RawSearchUser;
|
||||
interactInfo?: RawSearchInteractInfo;
|
||||
interact_info?: RawSearchInteractInfo;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
}
|
||||
|
||||
interface RawSearchImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawSearchUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawSearchInteractInfo {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
}
|
||||
|
||||
interface SearchInitialState {
|
||||
searchNotes?: {
|
||||
feeds?: RawSearchFeedItem[];
|
||||
};
|
||||
searchResult?: {
|
||||
notes?: RawSearchFeedItem[];
|
||||
feeds?: RawSearchFeedItem[];
|
||||
};
|
||||
search?: {
|
||||
feeds?: RawSearchFeedItem[];
|
||||
notes?: RawSearchFeedItem[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// searchFeeds
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Search Xiaohongshu for notes matching a keyword, with optional filters.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param keyword - The search term.
|
||||
* @param filters - Optional sorting, type, and time range filters.
|
||||
* @returns An array of Feed objects matching the search.
|
||||
*/
|
||||
export async function searchFeeds(
|
||||
page: Page,
|
||||
keyword: string,
|
||||
filters?: SearchFilters,
|
||||
): Promise<Feed[]> {
|
||||
const url = buildSearchUrl(keyword, filters);
|
||||
log.debug({ keyword, filters, url }, 'Navigating to search page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the page to render search results.
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as SearchInitialState | null;
|
||||
|
||||
if (initialState) {
|
||||
const feeds = parseSearchFeedsFromState(initialState);
|
||||
if (feeds.length > 0) {
|
||||
log.info({ keyword, count: feeds.length }, 'Extracted search results from __INITIAL_STATE__');
|
||||
return feeds;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no search feeds extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping using Playwright Node-side API
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug('Falling back to DOM scraping for search results');
|
||||
const feeds = await scrapeSearchResultsFromDom(page);
|
||||
log.info({ keyword, count: feeds.length }, 'Extracted search results from DOM');
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// URL construction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the full search URL with query parameters for keyword and filters.
|
||||
*/
|
||||
function buildSearchUrl(keyword: string, filters?: SearchFilters): string {
|
||||
const params = new URLSearchParams();
|
||||
params.set('keyword', keyword);
|
||||
|
||||
if (filters?.sort && SORT_PARAM[filters.sort]) {
|
||||
params.set('sort', SORT_PARAM[filters.sort]!);
|
||||
}
|
||||
|
||||
if (filters?.type && TYPE_PARAM[filters.type]) {
|
||||
params.set('type', TYPE_PARAM[filters.type]!);
|
||||
}
|
||||
|
||||
if (filters?.time && TIME_PARAM[filters.time]) {
|
||||
params.set('time', TIME_PARAM[filters.time]!);
|
||||
}
|
||||
|
||||
return `${SEARCH_BASE_URL}?${params.toString()}`;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing for search results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse search results from the __INITIAL_STATE__ data.
|
||||
*/
|
||||
function parseSearchFeedsFromState(state: SearchInitialState): Feed[] {
|
||||
// Try multiple known locations where search data may live.
|
||||
const rawFeeds: RawSearchFeedItem[] =
|
||||
state.searchNotes?.feeds ??
|
||||
state.searchResult?.notes ??
|
||||
state.searchResult?.feeds ??
|
||||
state.search?.feeds ??
|
||||
state.search?.notes ??
|
||||
[];
|
||||
|
||||
if (rawFeeds.length === 0) {
|
||||
// Walk top-level keys looking for an array that resembles feeds.
|
||||
for (const key of Object.keys(state)) {
|
||||
const value = state[key];
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const obj = value as Record<string, unknown>;
|
||||
const candidates = ['feeds', 'notes', 'items'];
|
||||
for (const candidate of candidates) {
|
||||
if (Array.isArray(obj[candidate])) {
|
||||
const parsed = (obj[candidate] as RawSearchFeedItem[])
|
||||
.map(parseRawSearchItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
if (parsed.length > 0) return parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawFeeds
|
||||
.map(parseRawSearchItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a single raw search result item into a structured Feed.
|
||||
*/
|
||||
function parseRawSearchItem(raw: RawSearchFeedItem): Feed | null {
|
||||
const card = raw.noteCard;
|
||||
|
||||
const id =
|
||||
raw.id ?? raw.noteId ?? raw.note_id ?? card?.noteId ?? '';
|
||||
const xsecToken =
|
||||
raw.xsecToken ?? raw.xsec_token ?? card?.xsecToken ?? card?.xsec_token ?? '';
|
||||
const title =
|
||||
raw.displayTitle ?? raw.display_title ?? raw.title ?? raw.name ??
|
||||
card?.displayTitle ?? card?.display_title ?? card?.title ?? '';
|
||||
const description =
|
||||
raw.desc ?? raw.description ?? card?.desc ?? '';
|
||||
|
||||
const rawType = raw.type ?? raw.model_type ?? card?.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
const rawCover = raw.cover ?? card?.cover;
|
||||
const coverUrl = extractSearchImageUrl(rawCover);
|
||||
|
||||
const rawUser = raw.user ?? card?.user;
|
||||
const userId = rawUser?.userId ?? rawUser?.user_id ?? '';
|
||||
const nickname =
|
||||
rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '';
|
||||
const avatar =
|
||||
rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '';
|
||||
|
||||
const interactInfo = raw.interactInfo ?? raw.interact_info ?? card?.interactInfo ?? card?.interact_info;
|
||||
const likeCountStr =
|
||||
interactInfo?.likedCount ?? interactInfo?.liked_count ??
|
||||
interactInfo?.likeCount ?? interactInfo?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
if (!id) return null;
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract image URL from a raw search cover object.
|
||||
*/
|
||||
function extractSearchImageUrl(raw: RawSearchImage | undefined): string {
|
||||
if (!raw) return '';
|
||||
if (raw.url) return ensureHttps(raw.url);
|
||||
if (raw.urlPre) return ensureHttps(raw.urlPre);
|
||||
if (raw.urlDefault) return ensureHttps(raw.urlDefault);
|
||||
if (raw.url_pre) return ensureHttps(raw.url_pre);
|
||||
if (raw.url_default) return ensureHttps(raw.url_default);
|
||||
|
||||
const infoList = raw.infoList ?? raw.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
return ensureHttps(infoList[0].url);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape search results using Playwright's Node-side API to avoid
|
||||
* needing DOM lib types.
|
||||
*/
|
||||
async function scrapeSearchResultsFromDom(page: Page): Promise<Feed[]> {
|
||||
// Wait for the search result note items to appear.
|
||||
await page
|
||||
.waitForSelector('.feeds-container .note-item', { timeout: 10_000 })
|
||||
.catch(() => null);
|
||||
|
||||
const cardElements = await page.$$('.feeds-container .note-item');
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of cardElements) {
|
||||
try {
|
||||
const href = await card
|
||||
.$eval('a.cover', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const xsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
const coverUrl = await card
|
||||
.$eval('a.cover img', (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const title = await card
|
||||
.$eval('.footer .title', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const nickname = await card
|
||||
.$eval('.footer .author-wrapper .name', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const avatar = await card
|
||||
.$eval('.footer .author-wrapper .author-head img', (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const authorHref = await card
|
||||
.$eval('.footer .author-wrapper a', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const authorIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
const likeText = await card
|
||||
.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
const likeCount = parseCountString(likeText);
|
||||
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// CSS Selectors — centralised so that UI changes only require edits here.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const XHS_SELECTORS = {
|
||||
login: {
|
||||
/** QR code image on the login modal / page. */
|
||||
qrCodeImage: '.login-container .qrcode-img',
|
||||
/** Element present only when the user is logged in (sidebar channel link). */
|
||||
loggedInIndicator: '.user .link-wrapper .channel',
|
||||
/** The "login" button that opens the QR code modal (if not already shown). */
|
||||
loginButton: '.login-btn',
|
||||
},
|
||||
|
||||
feed: {
|
||||
/** Container for each feed card on the explore page. */
|
||||
feedCard: '.note-item',
|
||||
/** The cover image within a feed card. */
|
||||
coverImage: '.note-item a.cover img',
|
||||
/** The title/footer within a feed card. */
|
||||
footerTitle: '.note-item .footer .title',
|
||||
/** Author name within a feed card. */
|
||||
authorName: '.note-item .footer .author-wrapper .name',
|
||||
/** Author avatar within a feed card. */
|
||||
authorAvatar: '.note-item .footer .author-wrapper .author-head img',
|
||||
/** Like count within a feed card. */
|
||||
likeCount: '.note-item .footer .like-wrapper .count',
|
||||
},
|
||||
|
||||
search: {
|
||||
/** Search result container. */
|
||||
resultContainer: '#global-search-result-container',
|
||||
/** Individual search result note items. */
|
||||
noteItem: '.feeds-container .note-item',
|
||||
/** Search result cover image. */
|
||||
coverImage: '.feeds-container .note-item a.cover img',
|
||||
/** Search result title. */
|
||||
title: '.feeds-container .note-item .footer .title',
|
||||
/** Search result author name. */
|
||||
authorName: '.feeds-container .note-item .footer .author-wrapper .name',
|
||||
/** Search result author avatar. */
|
||||
authorAvatar: '.feeds-container .note-item .footer .author-wrapper .author-head img',
|
||||
/** Search result like count. */
|
||||
likeCount: '.feeds-container .note-item .footer .like-wrapper .count',
|
||||
},
|
||||
|
||||
feedDetail: {
|
||||
/** The main content container for a note detail page. */
|
||||
noteContainer: '#noteContainer',
|
||||
/** The title of the note. */
|
||||
title: '#detail-title',
|
||||
/** The description / body content of the note. */
|
||||
description: '#detail-desc',
|
||||
/** Individual images in an image note. */
|
||||
images: '.note-image-list .note-image img',
|
||||
/** The single hero image (some notes use this instead of a list). */
|
||||
heroImage: '.note-hero img',
|
||||
/** Video player element. */
|
||||
video: '#videoplayer video',
|
||||
/** Video player source. */
|
||||
videoSource: '#videoplayer video source',
|
||||
/** Tag links within the note body. */
|
||||
tags: '#detail-desc a.tag',
|
||||
/** Like count. */
|
||||
likeCount: '.engage-bar .like-wrapper .count',
|
||||
/** Collect (favorite) count. */
|
||||
collectCount: '.engage-bar .collect-wrapper .count',
|
||||
/** Comment count. */
|
||||
commentCount: '.engage-bar .chat-wrapper .count',
|
||||
/** Share count. */
|
||||
shareCount: '.engage-bar .share-wrapper .count',
|
||||
/** Publish / create time text. */
|
||||
createTime: '.note-scroller .bottom-container .date',
|
||||
/** IP location. */
|
||||
ipLocation: '.note-scroller .bottom-container .ip-location',
|
||||
/** Author nickname on the detail page. */
|
||||
authorName: '.author-container .info .name',
|
||||
/** Author avatar on the detail page. */
|
||||
authorAvatar: '.author-container .info .avatar img',
|
||||
/** Author user ID link. */
|
||||
authorLink: '.author-container .info a',
|
||||
/** Comment list container. */
|
||||
commentListContainer: '.comments-container .list-container',
|
||||
/** Individual top-level comment items. */
|
||||
commentItem: '.comments-container .list-container .list-item',
|
||||
/** Parent comment content text. */
|
||||
commentContent: '.content',
|
||||
/** Comment author name. */
|
||||
commentAuthor: '.author .name',
|
||||
/** Comment author avatar. */
|
||||
commentAvatar: '.author .avatar img',
|
||||
/** Comment like count. */
|
||||
commentLikeCount: '.like .count',
|
||||
/** Comment publish time. */
|
||||
commentTime: '.date',
|
||||
/** Comment IP location. */
|
||||
commentIpLocation: '.ip-location',
|
||||
/** Sub-comment (reply) items. */
|
||||
subCommentItem: '.sub-comment-list .sub-comment-item',
|
||||
/** "Show more comments" button. */
|
||||
showMoreComments: '.comments-container .show-more',
|
||||
/** "Load more replies" button within a comment thread. */
|
||||
loadMoreReplies: '.sub-comment-list .show-more',
|
||||
},
|
||||
|
||||
userProfile: {
|
||||
/** Profile header container. */
|
||||
headerContainer: '.user-info',
|
||||
/** User nickname. */
|
||||
nickname: '.user-info .user-name',
|
||||
/** User avatar image. */
|
||||
avatar: '.user-info .user-image img',
|
||||
/** User bio / description text. */
|
||||
description: '.user-info .user-desc',
|
||||
/** User gender icon or text. */
|
||||
gender: '.user-info .gender-icon',
|
||||
/** IP location. */
|
||||
ipLocation: '.user-info .user-ip',
|
||||
/** Follower / following / interaction count elements. */
|
||||
followCount: '.user-info .data-area .data-item',
|
||||
/** Note count (displayed somewhere on the profile page). */
|
||||
noteCountTab: '.reds-tab-item',
|
||||
/** Individual feed items on the user profile. */
|
||||
feedItem: '.feeds-container .note-item',
|
||||
},
|
||||
|
||||
// -- Phase 4: Publish -----------------------------------------------------
|
||||
|
||||
publish: {
|
||||
/** The file input element for uploading images on the creator publish page. */
|
||||
imageFileInput: 'input[type="file"]',
|
||||
/** Title input field on the publish form. */
|
||||
titleInput: '#note-title',
|
||||
/** Content / body editor area on the publish form (contenteditable). */
|
||||
contentEditor: '#note-content',
|
||||
/** The tag / topic button that opens the topic input. */
|
||||
tagButton: '#topicBtn',
|
||||
/** Tag / topic input field for typing hashtags. */
|
||||
tagInput: '#topicBtn input',
|
||||
/** Topic / hashtag suggestion dropdown item. */
|
||||
tagSuggestionItem: '.publish-topic-item, .topic-item',
|
||||
/** "Publish" / submit button. */
|
||||
publishButton: '.publishBtn',
|
||||
/** Schedule / timing selector button. */
|
||||
scheduleButton: '.timing-btn, button:has-text("定时")',
|
||||
/** Schedule date/time input field. */
|
||||
scheduleInput: '.timing-input input, .schedule-input input',
|
||||
/** Original content declaration checkbox. */
|
||||
originalCheckbox: '.original-checkbox input, input[type="checkbox"][name="original"]',
|
||||
/** Visibility / permission setting button. */
|
||||
visibilityButton: '.permission-btn, button:has-text("可见")',
|
||||
/** Visibility option for public. */
|
||||
visibilityPublic: '.permission-option:has-text("公开"), .visibility-option:has-text("公开")',
|
||||
/** Visibility option for private. */
|
||||
visibilityPrivate: '.permission-option:has-text("私密"), .visibility-option:has-text("私密")',
|
||||
/** Visibility option for friends only. */
|
||||
visibilityFriends: '.permission-option:has-text("好友"), .visibility-option:has-text("好友")',
|
||||
/** Upload complete indicator (images uploaded and thumbnails visible). */
|
||||
uploadedImageItem: '.upload-item img, .img-item img, .image-item img',
|
||||
/** Video upload complete indicator (video thumbnail visible). */
|
||||
uploadedVideoItem: '.upload-video video, .video-item video, .video-container video',
|
||||
/** Success indicator shown after publish completes. */
|
||||
publishSuccess: '.success-panel, .publish-success, .note-success',
|
||||
/** URL in the address bar after successful publish (used as a fallback check). */
|
||||
publishSuccessUrlPattern: /\/publish\/success/,
|
||||
},
|
||||
|
||||
// -- Phase 4: Comment / Reply ---------------------------------------------
|
||||
|
||||
comment: {
|
||||
/** The comment input field / textarea on the feed detail page. */
|
||||
commentInput: '#content-textarea',
|
||||
/** Alternative comment input (contenteditable div). */
|
||||
commentInputAlt: '[contenteditable][data-placeholder]',
|
||||
/** Comment submit / send button. */
|
||||
commentSubmitButton: '.comment-submit, button.submit, .btn-send',
|
||||
/** Parent comment element (used to find specific comment by ID). */
|
||||
commentItem: '.comment-item, .note-comment-item, [id^="comment-"]',
|
||||
/** Reply button on an individual comment. */
|
||||
commentReplyButton: '.reply-btn, .comment-reply',
|
||||
/** Reply input that appears after clicking reply. */
|
||||
replyInput: '.reply-input textarea, .reply-content [contenteditable], .reply-area textarea',
|
||||
},
|
||||
|
||||
// -- Phase 4: Interaction (Like / Favorite) --------------------------------
|
||||
|
||||
interaction: {
|
||||
/** Like button on the feed detail page. */
|
||||
likeButton: '.engage-bar .like-wrapper, span.like-wrapper',
|
||||
/** Like button in active/liked state. */
|
||||
likeButtonActive: '.engage-bar .like-wrapper.active, span.like-wrapper.active',
|
||||
/** Like count element next to the like button. */
|
||||
likeCount: '.engage-bar .like-wrapper .count',
|
||||
/** Favorite / collect button on the feed detail page. */
|
||||
favoriteButton: '.engage-bar .collect-wrapper, span.collect-wrapper',
|
||||
/** Favorite button in active/favorited state. */
|
||||
favoriteButtonActive: '.engage-bar .collect-wrapper.active, span.collect-wrapper.active',
|
||||
/** Favorite count element next to the favorite button. */
|
||||
favoriteCount: '.engage-bar .collect-wrapper .count',
|
||||
/** Container for the interaction bar at the bottom of a feed detail. */
|
||||
interactionBar: '.interact-container, .engage-bar',
|
||||
},
|
||||
} as const;
|
||||
@@ -0,0 +1,98 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// Xiaohongshu domain types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// -- Login -----------------------------------------------------------------
|
||||
|
||||
export interface LoginStatus {
|
||||
loggedIn: boolean;
|
||||
username?: string;
|
||||
}
|
||||
|
||||
export interface QRCodeResult {
|
||||
/** Base64 data URI of the QR code image. */
|
||||
qrcodeData: string;
|
||||
/** Whether the user was already logged in (no QR code needed). */
|
||||
alreadyLoggedIn: boolean;
|
||||
/** Human-readable timeout hint (e.g. "4m"). */
|
||||
timeout: string;
|
||||
}
|
||||
|
||||
// -- Feed -----------------------------------------------------------------
|
||||
|
||||
export interface FeedUser {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
}
|
||||
|
||||
export interface Feed {
|
||||
id: string;
|
||||
xsecToken: string;
|
||||
title: string;
|
||||
description: string;
|
||||
type: 'normal' | 'video';
|
||||
coverUrl: string;
|
||||
likeCount: number;
|
||||
user: FeedUser;
|
||||
}
|
||||
|
||||
// -- Feed Detail ----------------------------------------------------------
|
||||
|
||||
export interface FeedDetail {
|
||||
id: string;
|
||||
xsecToken: string;
|
||||
title: string;
|
||||
description: string;
|
||||
type: 'normal' | 'video';
|
||||
images: string[];
|
||||
videoUrl?: string;
|
||||
tags: string[];
|
||||
likeCount: number;
|
||||
collectCount: number;
|
||||
commentCount: number;
|
||||
shareCount: number;
|
||||
createTime: string;
|
||||
lastUpdateTime: string;
|
||||
ipLocation: string;
|
||||
user: FeedUser;
|
||||
comments: Comment[];
|
||||
}
|
||||
|
||||
// -- Comment --------------------------------------------------------------
|
||||
|
||||
export interface Comment {
|
||||
id: string;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
likeCount: number;
|
||||
createTime: string;
|
||||
ipLocation: string;
|
||||
subComments: Comment[];
|
||||
}
|
||||
|
||||
// -- User Profile ---------------------------------------------------------
|
||||
|
||||
export interface UserProfile {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
description: string;
|
||||
gender: string;
|
||||
ipLocation: string;
|
||||
follows: number;
|
||||
fans: number;
|
||||
interaction: number;
|
||||
feedCount: number;
|
||||
feeds: Feed[];
|
||||
}
|
||||
|
||||
// -- Search Filters -------------------------------------------------------
|
||||
|
||||
export interface SearchFilters {
|
||||
sort?: 'general' | 'time_descending' | 'popularity_descending';
|
||||
type?: 'all' | 'note' | 'video';
|
||||
time?: 'all' | 'day' | 'week' | 'half_year';
|
||||
}
|
||||
@@ -0,0 +1,442 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { UserProfile, Feed } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const USER_PROFILE_BASE_URL = 'https://www.xiaohongshu.com/user/profile';
|
||||
|
||||
const SEL = XHS_SELECTORS.userProfile;
|
||||
|
||||
const log = logger.child({ module: 'xhs-user-profile' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for user profile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawProfileState {
|
||||
user?: {
|
||||
userPageData?: RawUserPageData;
|
||||
userInfo?: RawUserInfo;
|
||||
};
|
||||
userProfile?: {
|
||||
userInfo?: RawUserInfo;
|
||||
notes?: RawProfileNote[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface RawUserPageData {
|
||||
basicInfo?: RawUserInfo;
|
||||
interactions?: RawInteractions;
|
||||
notes?: RawProfileNote[];
|
||||
noteCount?: number | string;
|
||||
note_count?: number | string;
|
||||
}
|
||||
|
||||
interface RawUserInfo {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
images?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
gender?: number | string;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
fstatus?: string;
|
||||
follows?: number | string;
|
||||
fans?: number | string;
|
||||
interaction?: number | string;
|
||||
noteCount?: number | string;
|
||||
note_count?: number | string;
|
||||
}
|
||||
|
||||
interface RawInteractions {
|
||||
follows?: string | number;
|
||||
fans?: string | number;
|
||||
interaction?: string | number;
|
||||
}
|
||||
|
||||
interface RawProfileNote {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
url_pre?: string;
|
||||
urlDefault?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
};
|
||||
user?: {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
avatar?: string;
|
||||
};
|
||||
interactInfo?: {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
};
|
||||
interact_info?: {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
};
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getUserProfile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to a Xiaohongshu user profile page and extract their information,
|
||||
* including basic info, follower/following counts, and recent notes.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param userId - The user ID.
|
||||
* @param xsecToken - Security token required to access the profile page.
|
||||
* @returns A UserProfile object with the user's data.
|
||||
*/
|
||||
export async function getUserProfile(
|
||||
page: Page,
|
||||
userId: string,
|
||||
xsecToken: string,
|
||||
): Promise<UserProfile> {
|
||||
const url = `${USER_PROFILE_BASE_URL}/${userId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_note`;
|
||||
log.debug({ userId, url }, 'Navigating to user profile page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the user profile header to appear.
|
||||
await page
|
||||
.waitForSelector(SEL.headerContainer, { timeout: 15_000 })
|
||||
.catch(() => {
|
||||
log.warn({ userId }, 'User profile header not found within timeout, proceeding');
|
||||
});
|
||||
|
||||
// Allow render to settle.
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as RawProfileState | null;
|
||||
|
||||
if (initialState) {
|
||||
const profile = parseProfileFromState(initialState, userId, xsecToken);
|
||||
if (profile) {
|
||||
log.info({ userId, feedCount: profile.feeds.length }, 'Extracted user profile from __INITIAL_STATE__');
|
||||
return profile;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no profile data extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug({ userId }, 'Falling back to DOM scraping for user profile');
|
||||
const profile = await scrapeProfileFromDom(page, userId, xsecToken);
|
||||
log.info({ userId, feedCount: profile.feeds.length }, 'Extracted user profile from DOM');
|
||||
return profile;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse user profile data from __INITIAL_STATE__.
|
||||
*/
|
||||
function parseProfileFromState(
|
||||
state: RawProfileState,
|
||||
userId: string,
|
||||
_xsecToken: string,
|
||||
): UserProfile | null {
|
||||
// Try multiple known locations for user data.
|
||||
const userPageData = state.user?.userPageData;
|
||||
const userInfo =
|
||||
userPageData?.basicInfo ??
|
||||
state.user?.userInfo ??
|
||||
state.userProfile?.userInfo;
|
||||
|
||||
if (!userInfo) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const id = userInfo.userId ?? userInfo.user_id ?? userId;
|
||||
const nickname = userInfo.nickname ?? userInfo.nick_name ?? userInfo.nickName ?? '';
|
||||
const avatar = userInfo.avatar ?? userInfo.avatarUrl ?? userInfo.avatar_url ?? userInfo.images ?? '';
|
||||
const description = userInfo.desc ?? userInfo.description ?? '';
|
||||
|
||||
// Gender: 0=unknown, 1=male, 2=female
|
||||
const genderRaw = userInfo.gender;
|
||||
let gender = '';
|
||||
if (genderRaw === 1 || genderRaw === '1') gender = 'male';
|
||||
else if (genderRaw === 2 || genderRaw === '2') gender = 'female';
|
||||
|
||||
const ipLocation = userInfo.ipLocation ?? userInfo.ip_location ?? '';
|
||||
|
||||
// Follower / following / interaction counts.
|
||||
const interactions = userPageData?.interactions;
|
||||
const follows = toNumber(interactions?.follows ?? userInfo.follows ?? 0);
|
||||
const fans = toNumber(interactions?.fans ?? userInfo.fans ?? 0);
|
||||
const interaction = toNumber(interactions?.interaction ?? userInfo.interaction ?? 0);
|
||||
|
||||
// Note count.
|
||||
const feedCount = toNumber(
|
||||
userPageData?.noteCount ?? userPageData?.note_count ??
|
||||
userInfo.noteCount ?? userInfo.note_count ?? 0,
|
||||
);
|
||||
|
||||
// Notes / feeds on the profile page.
|
||||
const rawNotes: RawProfileNote[] =
|
||||
userPageData?.notes ?? state.userProfile?.notes ?? [];
|
||||
const feeds = rawNotes
|
||||
.map((note) => parseProfileNote(note, userId))
|
||||
.filter((f): f is Feed => f !== null);
|
||||
|
||||
return {
|
||||
id,
|
||||
nickname,
|
||||
avatar: avatar ? ensureHttps(avatar) : '',
|
||||
description,
|
||||
gender,
|
||||
ipLocation,
|
||||
follows,
|
||||
fans,
|
||||
interaction,
|
||||
feedCount,
|
||||
feeds,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a note from the user profile state into a Feed object.
|
||||
*/
|
||||
function parseProfileNote(
|
||||
raw: RawProfileNote,
|
||||
ownerUserId: string,
|
||||
): Feed | null {
|
||||
const id = raw.id ?? raw.noteId ?? raw.note_id ?? '';
|
||||
if (!id) return null;
|
||||
|
||||
const noteXsecToken = raw.xsecToken ?? raw.xsec_token ?? '';
|
||||
const title = raw.displayTitle ?? raw.display_title ?? raw.title ?? '';
|
||||
const description = raw.desc ?? '';
|
||||
const rawType = raw.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Cover image.
|
||||
let coverUrl = '';
|
||||
if (raw.cover) {
|
||||
coverUrl =
|
||||
raw.cover.url ?? raw.cover.urlPre ?? raw.cover.url_pre ??
|
||||
raw.cover.urlDefault ?? raw.cover.url_default ?? '';
|
||||
if (!coverUrl) {
|
||||
const infoList = raw.cover.infoList ?? raw.cover.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
coverUrl = infoList[0].url;
|
||||
}
|
||||
}
|
||||
if (coverUrl) coverUrl = ensureHttps(coverUrl);
|
||||
}
|
||||
|
||||
// Like count.
|
||||
const interact = raw.interactInfo ?? raw.interact_info;
|
||||
const likeCountStr =
|
||||
interact?.likedCount ?? interact?.liked_count ??
|
||||
interact?.likeCount ?? interact?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
// User.
|
||||
const rawUser = raw.user;
|
||||
const user = {
|
||||
id: rawUser?.userId ?? rawUser?.user_id ?? ownerUserId,
|
||||
nickname: rawUser?.nickname ?? rawUser?.nick_name ?? '',
|
||||
avatar: rawUser?.avatar ?? '',
|
||||
};
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken: noteXsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape user profile data from the rendered DOM using Playwright's
|
||||
* Node-side APIs to avoid needing DOM lib types.
|
||||
*/
|
||||
async function scrapeProfileFromDom(
|
||||
page: Page,
|
||||
userId: string,
|
||||
xsecToken: string,
|
||||
): Promise<UserProfile> {
|
||||
// Nickname
|
||||
const nickname = await page
|
||||
.$eval(SEL.nickname, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Avatar
|
||||
const avatar = await page
|
||||
.$eval(SEL.avatar, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Description / bio
|
||||
const description = await page
|
||||
.$eval(SEL.description, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Gender — try the gender icon class.
|
||||
const gender = await page
|
||||
.$eval(SEL.gender, (el) => {
|
||||
const cls = el.className.toLowerCase();
|
||||
if (cls.includes('male') && !cls.includes('female')) return 'male';
|
||||
if (cls.includes('female')) return 'female';
|
||||
return '';
|
||||
})
|
||||
.catch(() => '');
|
||||
|
||||
// IP location
|
||||
const ipLocation = await page
|
||||
.$eval(SEL.ipLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Follower / following / interaction counts.
|
||||
// These are typically in a row of .data-item elements.
|
||||
const dataCounts = await page.$$eval(SEL.followCount, (items) =>
|
||||
items.map((item) => {
|
||||
const countEl = item.querySelector('.count');
|
||||
return countEl?.textContent?.trim() ?? '0';
|
||||
}),
|
||||
).catch(() => [] as string[]);
|
||||
|
||||
const follows = parseCountString(dataCounts[0] ?? '0');
|
||||
const fans = parseCountString(dataCounts[1] ?? '0');
|
||||
const interaction = parseCountString(dataCounts[2] ?? '0');
|
||||
|
||||
// Note count from tab — use a string expression to run in browser context
|
||||
// without needing DOM types in our TypeScript config.
|
||||
const feedCount = await page
|
||||
.$$eval(SEL.noteCountTab, (tabs) => {
|
||||
for (const tab of tabs) {
|
||||
const text = tab.textContent ?? '';
|
||||
if (text.includes('\u7B14\u8BB0')) {
|
||||
const match = text.match(/\d+/);
|
||||
return match ? parseInt(match[0], 10) : 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
})
|
||||
.catch(() => 0);
|
||||
|
||||
// Scrape feed items on the profile page.
|
||||
const feedElements = await page.$$(SEL.feedItem);
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of feedElements) {
|
||||
try {
|
||||
const href = await card
|
||||
.$eval('a.cover', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const noteXsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
const coverUrl = await card
|
||||
.$eval('a.cover img', (el) => el.getAttribute('src') ?? el.getAttribute('data-src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const feedTitle = await card
|
||||
.$eval('.footer .title', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const likeText = await card
|
||||
.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken: noteXsecToken || xsecToken,
|
||||
title: feedTitle,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount: parseCountString(likeText),
|
||||
user: { id: userId, nickname: '', avatar: '' },
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: userId,
|
||||
nickname,
|
||||
avatar,
|
||||
description,
|
||||
gender,
|
||||
ipLocation,
|
||||
follows,
|
||||
fans,
|
||||
interaction,
|
||||
feedCount,
|
||||
feeds,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convert a string or number to a number, handling abbreviations.
|
||||
*/
|
||||
function toNumber(val: string | number): number {
|
||||
if (typeof val === 'number') return val;
|
||||
return parseCountString(val);
|
||||
}
|
||||
@@ -0,0 +1,342 @@
|
||||
import http from 'node:http';
|
||||
|
||||
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
||||
import express from 'express';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { BrowserManager, browserManager } from '../browser/manager.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import {
|
||||
dnsRebindingGuard,
|
||||
shutdownGuard,
|
||||
errorHandler,
|
||||
bearerAuth,
|
||||
initBearerToken,
|
||||
} from './middleware.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Package version -- read once at module load for the /health endpoint and
|
||||
// the MCP server info. Uses a static string so we avoid importing JSON
|
||||
// (which would require `resolveJsonModule` + ESM assertion gymnastics).
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PACKAGE_VERSION = '0.1.0';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PlatformPlugin interface
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Contract that every platform adapter (Twitter, Xiaohongshu, etc.) must
|
||||
* implement to plug into the application.
|
||||
*
|
||||
* - `registerTools` is the minimum requirement: expose MCP tools.
|
||||
* - Optional hooks allow the plugin to mount REST routes, perform async
|
||||
* initialisation, clean up on shutdown, and report its health.
|
||||
*/
|
||||
export interface PlatformPlugin {
|
||||
/** Human-readable name used in logs and health-check output. */
|
||||
name: string;
|
||||
|
||||
/** Register MCP tools on the shared McpServer instance. */
|
||||
registerTools(server: McpServer, browser: BrowserManager): void;
|
||||
|
||||
/** Optionally mount Express routes (e.g. OAuth callbacks, webhooks). */
|
||||
registerRoutes?(router: express.Router, browser: BrowserManager): void;
|
||||
|
||||
/** Async initialisation (cookie restore, feature detection, etc.). */
|
||||
init?(): Promise<void>;
|
||||
|
||||
/** Teardown hook called during graceful shutdown. */
|
||||
shutdown?(): Promise<void>;
|
||||
|
||||
/** Return platform-specific health information. */
|
||||
healthCheck?(): Promise<{ healthy: boolean; message?: string }>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AppServer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class AppServer {
|
||||
// -- Public surface -------------------------------------------------------
|
||||
|
||||
/** The underlying Express application -- use for plugin route mounting. */
|
||||
readonly app: express.Express;
|
||||
|
||||
/** The MCP server instance -- use for plugin tool registration. */
|
||||
readonly mcpServer: McpServer;
|
||||
|
||||
// -- Internal state -------------------------------------------------------
|
||||
|
||||
private httpServer: http.Server | null = null;
|
||||
private shuttingDown = false;
|
||||
private readonly plugins: PlatformPlugin[] = [];
|
||||
|
||||
/**
|
||||
* SSE transports keyed by session ID so that POST /messages can route
|
||||
* incoming JSON-RPC messages to the correct transport instance.
|
||||
*/
|
||||
private readonly transports = new Map<string, SSEServerTransport>();
|
||||
|
||||
// -- Constructor ----------------------------------------------------------
|
||||
|
||||
constructor() {
|
||||
// 1. Express app + body parsing
|
||||
this.app = express();
|
||||
this.app.use(express.json());
|
||||
|
||||
// 2. Security & availability middleware
|
||||
this.app.use(dnsRebindingGuard);
|
||||
this.app.use(shutdownGuard(() => this.shuttingDown));
|
||||
|
||||
// 3. MCP server
|
||||
this.mcpServer = new McpServer(
|
||||
{ name: 'social-mcp', version: PACKAGE_VERSION },
|
||||
);
|
||||
|
||||
// 4. SSE transport endpoints
|
||||
this.setupSseEndpoints();
|
||||
|
||||
// 5. Health endpoint
|
||||
this.setupHealthEndpoint();
|
||||
|
||||
// 6. Bearer token auth for /api/* routes
|
||||
initBearerToken();
|
||||
this.app.use('/api', bearerAuth);
|
||||
|
||||
// 7. Error handler (must be registered last -- re-registered after plugins)
|
||||
this.app.use(errorHandler);
|
||||
}
|
||||
|
||||
// -- Plugin registration --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Register a platform plugin. Call this **before** `start()` so that all
|
||||
* tools and routes are wired up before the server begins accepting
|
||||
* connections.
|
||||
*/
|
||||
registerPlugin(plugin: PlatformPlugin): void {
|
||||
logger.info({ plugin: plugin.name }, 'Registering platform plugin');
|
||||
|
||||
plugin.registerTools(this.mcpServer, browserManager);
|
||||
|
||||
if (plugin.registerRoutes) {
|
||||
const router = express.Router();
|
||||
plugin.registerRoutes(router, browserManager);
|
||||
// Mount REST API routes under /api/xhs (for xiaohongshu)
|
||||
this.app.use(`/api/xhs`, router);
|
||||
}
|
||||
|
||||
this.plugins.push(plugin);
|
||||
}
|
||||
|
||||
// -- Lifecycle ------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Initialise all plugins and start listening for HTTP connections on
|
||||
* `config.host:config.port`.
|
||||
*
|
||||
* Returns a promise that resolves once the server is ready.
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
// Initialise plugins (sequentially so order is deterministic).
|
||||
for (const plugin of this.plugins) {
|
||||
if (plugin.init) {
|
||||
logger.info({ plugin: plugin.name }, 'Initialising plugin');
|
||||
await plugin.init();
|
||||
}
|
||||
}
|
||||
|
||||
// Re-register the error handler so it sits after any plugin routes.
|
||||
this.app.use(errorHandler);
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
this.httpServer = this.app
|
||||
.listen(config.port, config.host, () => {
|
||||
logger.info(
|
||||
{ host: config.host, port: config.port },
|
||||
'AppServer listening',
|
||||
);
|
||||
resolve();
|
||||
})
|
||||
.on('error', (err: Error) => {
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate graceful shutdown:
|
||||
* 1. Set the shutting-down flag (new requests get 503).
|
||||
* 2. Shut down every plugin.
|
||||
* 3. Close all SSE transports and the MCP server.
|
||||
* 4. Close the HTTP server.
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
if (this.shuttingDown) return;
|
||||
this.shuttingDown = true;
|
||||
|
||||
logger.info('AppServer shutting down');
|
||||
|
||||
// Shut down plugins
|
||||
for (const plugin of this.plugins) {
|
||||
if (plugin.shutdown) {
|
||||
try {
|
||||
await plugin.shutdown();
|
||||
} catch (err: unknown) {
|
||||
logger.warn({ err, plugin: plugin.name }, 'Error shutting down plugin');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close all SSE transports
|
||||
for (const [sessionId, transport] of this.transports) {
|
||||
try {
|
||||
await transport.close();
|
||||
} catch (err: unknown) {
|
||||
logger.warn({ err, sessionId }, 'Error closing SSE transport');
|
||||
}
|
||||
}
|
||||
this.transports.clear();
|
||||
|
||||
// Close the MCP server
|
||||
try {
|
||||
await this.mcpServer.close();
|
||||
} catch (err: unknown) {
|
||||
logger.warn({ err }, 'Error closing MCP server');
|
||||
}
|
||||
|
||||
// Close the HTTP server
|
||||
if (this.httpServer) {
|
||||
await new Promise<void>((resolve) => {
|
||||
this.httpServer!.close(() => {
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
this.httpServer = null;
|
||||
}
|
||||
|
||||
logger.info('AppServer shut down complete');
|
||||
}
|
||||
|
||||
// -- Private: SSE endpoints -----------------------------------------------
|
||||
|
||||
private setupSseEndpoints(): void {
|
||||
// GET /sse -- establish a new SSE connection
|
||||
this.app.get('/sse', (req, res) => {
|
||||
logger.debug({ ip: req.ip }, 'New SSE connection request');
|
||||
|
||||
const transport = new SSEServerTransport('/messages', res);
|
||||
const sessionId = transport.sessionId;
|
||||
|
||||
this.transports.set(sessionId, transport);
|
||||
|
||||
logger.info({ sessionId }, 'SSE transport created');
|
||||
|
||||
// Clean up when the client disconnects.
|
||||
res.on('close', () => {
|
||||
logger.info({ sessionId }, 'SSE client disconnected');
|
||||
this.transports.delete(sessionId);
|
||||
});
|
||||
|
||||
// Connect the transport to the MCP server. This starts the SSE
|
||||
// stream and sends the initial endpoint event to the client.
|
||||
void this.mcpServer.connect(transport).catch((err: unknown) => {
|
||||
logger.error({ err, sessionId }, 'Failed to connect SSE transport to MCP server');
|
||||
this.transports.delete(sessionId);
|
||||
});
|
||||
});
|
||||
|
||||
// POST /messages -- receive JSON-RPC messages for an existing session
|
||||
this.app.post('/messages', (req, res) => {
|
||||
const sessionId = req.query['sessionId'] as string | undefined;
|
||||
|
||||
if (!sessionId) {
|
||||
res.status(400).json({ error: 'Missing sessionId query parameter' });
|
||||
return;
|
||||
}
|
||||
|
||||
const transport = this.transports.get(sessionId);
|
||||
|
||||
if (!transport) {
|
||||
res.status(404).json({ error: 'Unknown or expired session' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Delegate to the transport; it will parse the body and route the
|
||||
// JSON-RPC message to the MCP server.
|
||||
void transport.handlePostMessage(req, res).catch((err: unknown) => {
|
||||
logger.error({ err, sessionId }, 'Error handling POST /messages');
|
||||
if (!res.headersSent) {
|
||||
res.status(500).json({ error: 'Internal server error' });
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// -- Private: Health endpoint ---------------------------------------------
|
||||
|
||||
private setupHealthEndpoint(): void {
|
||||
this.app.get('/health', (_req, res) => {
|
||||
void this.buildHealthResponse()
|
||||
.then((body) => {
|
||||
const status = body.healthy ? 200 : 503;
|
||||
res.status(status).json(body);
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
logger.error({ err }, 'Health check failed unexpectedly');
|
||||
res.status(500).json({ healthy: false, error: 'Health check error' });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private async buildHealthResponse(): Promise<Record<string, unknown>> {
|
||||
// Memory usage
|
||||
const mem = process.memoryUsage();
|
||||
const memoryMb = {
|
||||
rss: Math.round(mem.rss / 1024 / 1024),
|
||||
heapUsed: Math.round(mem.heapUsed / 1024 / 1024),
|
||||
heapTotal: Math.round(mem.heapTotal / 1024 / 1024),
|
||||
external: Math.round(mem.external / 1024 / 1024),
|
||||
};
|
||||
|
||||
// Active SSE sessions
|
||||
const activeSessions = this.transports.size;
|
||||
|
||||
// Plugin health checks
|
||||
const pluginHealth: Record<string, { healthy: boolean; message?: string }> = {};
|
||||
let allPluginsHealthy = true;
|
||||
|
||||
for (const plugin of this.plugins) {
|
||||
if (plugin.healthCheck) {
|
||||
try {
|
||||
const result = await plugin.healthCheck();
|
||||
pluginHealth[plugin.name] = result;
|
||||
if (!result.healthy) {
|
||||
allPluginsHealthy = false;
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
pluginHealth[plugin.name] = { healthy: false, message };
|
||||
allPluginsHealthy = false;
|
||||
}
|
||||
} else {
|
||||
pluginHealth[plugin.name] = { healthy: true };
|
||||
}
|
||||
}
|
||||
|
||||
const healthy = allPluginsHealthy && !this.shuttingDown;
|
||||
|
||||
return {
|
||||
healthy,
|
||||
version: PACKAGE_VERSION,
|
||||
uptime: Math.round(process.uptime()),
|
||||
shuttingDown: this.shuttingDown,
|
||||
activeSessions,
|
||||
plugins: pluginHealth,
|
||||
memory: memoryMb,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,278 @@
|
||||
import crypto from 'node:crypto';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
import type { Request, Response, NextFunction } from 'express';
|
||||
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { sanitizeErrorMessage } from '../utils/errors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allowed hosts for DNS rebinding protection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const allowedHosts = new Set<string>([
|
||||
'127.0.0.1',
|
||||
'localhost',
|
||||
`127.0.0.1:${config.port}`,
|
||||
`localhost:${config.port}`,
|
||||
]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 1. DNS Rebinding Guard
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reject requests whose `Host` header does not match an expected localhost
|
||||
* value. This prevents DNS rebinding attacks from reaching the service when
|
||||
* it is bound to the loopback interface.
|
||||
*/
|
||||
export function dnsRebindingGuard(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: NextFunction,
|
||||
): void {
|
||||
const host = req.headers.host;
|
||||
|
||||
if (!host || !allowedHosts.has(host)) {
|
||||
logger.warn(
|
||||
{ host, ip: req.ip, method: req.method, url: req.originalUrl },
|
||||
'DNS rebinding guard: blocked request with disallowed Host header',
|
||||
);
|
||||
res.status(403).json({ error: 'Forbidden' });
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 2. Shutdown Guard (factory)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Factory that returns middleware rejecting new requests once the server has
|
||||
* started its graceful shutdown sequence.
|
||||
*
|
||||
* @param getShuttingDown - Callback that returns `true` when shutdown is in progress.
|
||||
*/
|
||||
export function shutdownGuard(
|
||||
getShuttingDown: () => boolean,
|
||||
): (req: Request, res: Response, next: NextFunction) => void {
|
||||
return (_req: Request, res: Response, next: NextFunction): void => {
|
||||
if (getShuttingDown()) {
|
||||
res.status(503).json({ error: 'Server is shutting down' });
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 3. Error Handler
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Express error-handling middleware (four-argument signature).
|
||||
*
|
||||
* Logs the full error internally while returning a sanitized message to the
|
||||
* client so that internal filesystem paths, tokens, and stack traces are
|
||||
* never exposed.
|
||||
*/
|
||||
export function errorHandler(
|
||||
err: Error,
|
||||
req: Request,
|
||||
res: Response,
|
||||
_next: NextFunction,
|
||||
): void {
|
||||
logger.error(
|
||||
{ err, method: req.method, url: req.originalUrl },
|
||||
'Unhandled error in request pipeline',
|
||||
);
|
||||
|
||||
const message = sanitizeErrorMessage(err.message || 'Internal server error');
|
||||
|
||||
res.status(500).json({ error: message });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 4. Bearer Token Authentication
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const TOKEN_FILENAME = '.api-token';
|
||||
|
||||
/** Cached token once loaded/generated. */
|
||||
let cachedToken: string | null = null;
|
||||
|
||||
/**
|
||||
* Load or generate the Bearer API token.
|
||||
*
|
||||
* - On first start, generates a random 32-byte hex token.
|
||||
* - Stores it at `config.cookieDir/.api-token` with 0o600 permissions.
|
||||
* - On subsequent starts, reads the existing token from disk.
|
||||
* - Logs the token to console so the user can copy it.
|
||||
*
|
||||
* Must be called once during server startup.
|
||||
*/
|
||||
export function initBearerToken(): string {
|
||||
if (cachedToken) return cachedToken;
|
||||
|
||||
const tokenPath = path.join(config.cookieDir, TOKEN_FILENAME);
|
||||
|
||||
// Ensure the directory exists.
|
||||
try {
|
||||
fs.mkdirSync(config.cookieDir, { recursive: true, mode: 0o700 });
|
||||
} catch {
|
||||
// Directory may already exist.
|
||||
}
|
||||
|
||||
// Try to read an existing token.
|
||||
try {
|
||||
const existing = fs.readFileSync(tokenPath, 'utf-8').trim();
|
||||
if (existing.length >= 32) {
|
||||
cachedToken = existing;
|
||||
logger.info('API Bearer token loaded from disk');
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`\n REST API Bearer Token: ${cachedToken}\n`);
|
||||
return cachedToken;
|
||||
}
|
||||
} catch {
|
||||
// File does not exist or is unreadable — generate a new token.
|
||||
}
|
||||
|
||||
// Generate a new token.
|
||||
cachedToken = crypto.randomBytes(32).toString('hex');
|
||||
|
||||
fs.writeFileSync(tokenPath, cachedToken + '\n', { mode: 0o600 });
|
||||
|
||||
logger.info('New API Bearer token generated and saved');
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`\n REST API Bearer Token: ${cachedToken}\n`);
|
||||
|
||||
return cachedToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Express middleware that validates a `Bearer <token>` header against the
|
||||
* stored API token. Uses `crypto.timingSafeEqual` to prevent timing attacks.
|
||||
*
|
||||
* Apply to `/api/*` routes only.
|
||||
*/
|
||||
export function bearerAuth(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: NextFunction,
|
||||
): void {
|
||||
const authHeader = req.headers.authorization;
|
||||
|
||||
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
||||
res.status(401).json({
|
||||
success: false,
|
||||
error: { code: 'UNAUTHORIZED', message: 'Missing or invalid Authorization header' },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const provided = authHeader.slice(7); // Strip "Bearer "
|
||||
|
||||
if (!cachedToken) {
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: { code: 'INTERNAL', message: 'API token not initialized' },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Use timing-safe comparison to prevent timing attacks.
|
||||
const providedBuf = Buffer.from(provided, 'utf-8');
|
||||
const expectedBuf = Buffer.from(cachedToken, 'utf-8');
|
||||
|
||||
if (
|
||||
providedBuf.length !== expectedBuf.length ||
|
||||
!crypto.timingSafeEqual(providedBuf, expectedBuf)
|
||||
) {
|
||||
res.status(403).json({
|
||||
success: false,
|
||||
error: { code: 'FORBIDDEN', message: 'Invalid Bearer token' },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 5. Rate Limiter (in-memory, per-IP)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RateLimiterOptions {
|
||||
/** Time window in milliseconds. */
|
||||
windowMs: number;
|
||||
/** Maximum number of requests allowed in the window. */
|
||||
maxRequests: number;
|
||||
}
|
||||
|
||||
interface RateLimiterEntry {
|
||||
/** Request timestamps within the current window. */
|
||||
timestamps: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an in-memory per-IP rate limiter middleware.
|
||||
*
|
||||
* Returns 429 when the rate limit is exceeded. Old entries are automatically
|
||||
* cleaned up every 60 seconds to prevent memory leaks.
|
||||
*/
|
||||
export function rateLimiter(opts: RateLimiterOptions) {
|
||||
const store = new Map<string, RateLimiterEntry>();
|
||||
|
||||
// Periodic cleanup of stale entries.
|
||||
const cleanupInterval = setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const [ip, entry] of store) {
|
||||
entry.timestamps = entry.timestamps.filter((t) => now - t < opts.windowMs);
|
||||
if (entry.timestamps.length === 0) {
|
||||
store.delete(ip);
|
||||
}
|
||||
}
|
||||
}, 60_000);
|
||||
|
||||
// Do not let the cleanup timer keep the process alive during shutdown.
|
||||
if (typeof cleanupInterval === 'object' && 'unref' in cleanupInterval) {
|
||||
cleanupInterval.unref();
|
||||
}
|
||||
|
||||
return (req: Request, res: Response, next: NextFunction): void => {
|
||||
const ip = req.ip ?? req.socket.remoteAddress ?? 'unknown';
|
||||
const now = Date.now();
|
||||
|
||||
let entry = store.get(ip);
|
||||
if (!entry) {
|
||||
entry = { timestamps: [] };
|
||||
store.set(ip, entry);
|
||||
}
|
||||
|
||||
// Remove timestamps outside the current window.
|
||||
entry.timestamps = entry.timestamps.filter((t) => now - t < opts.windowMs);
|
||||
|
||||
if (entry.timestamps.length >= opts.maxRequests) {
|
||||
const retryAfterMs = opts.windowMs - (now - (entry.timestamps[0] ?? now));
|
||||
const retryAfterSec = Math.ceil(retryAfterMs / 1000);
|
||||
|
||||
res.set('Retry-After', String(retryAfterSec));
|
||||
res.status(429).json({
|
||||
success: false,
|
||||
error: {
|
||||
code: 'RATE_LIMITED',
|
||||
message: `Too many requests. Try again in ${String(retryAfterSec)} seconds.`,
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
entry.timestamps.push(now);
|
||||
next();
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,242 @@
|
||||
import { open, stat, unlink, writeFile, mkdir } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { randomUUID } from "node:crypto";
|
||||
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_MAX_SIZE_MB = 20;
|
||||
const BYTES_PER_MB = 1024 * 1024;
|
||||
|
||||
/** Minimum bytes we need to read to identify all supported formats. */
|
||||
const MAGIC_BYTES_LEN = 12;
|
||||
|
||||
/**
|
||||
* Map Content-Type values to file extensions. Used as a fallback when the
|
||||
* URL does not contain a recognisable extension.
|
||||
*/
|
||||
const MIME_TO_EXT: Record<string, string> = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
"video/mp4": ".mp4",
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Magic-byte detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function detectMimeType(header: Buffer): string | undefined {
|
||||
// JPEG: starts with FF D8 FF
|
||||
if (header[0] === 0xff && header[1] === 0xd8 && header[2] === 0xff) {
|
||||
return "image/jpeg";
|
||||
}
|
||||
|
||||
// PNG: starts with 89 50 4E 47
|
||||
if (
|
||||
header[0] === 0x89 &&
|
||||
header[1] === 0x50 &&
|
||||
header[2] === 0x4e &&
|
||||
header[3] === 0x47
|
||||
) {
|
||||
return "image/png";
|
||||
}
|
||||
|
||||
// WebP: RIFF....WEBP (bytes 0-3 = "RIFF", bytes 8-11 = "WEBP")
|
||||
if (
|
||||
header[0] === 0x52 &&
|
||||
header[1] === 0x49 &&
|
||||
header[2] === 0x46 &&
|
||||
header[3] === 0x46 &&
|
||||
header[8] === 0x57 &&
|
||||
header[9] === 0x45 &&
|
||||
header[10] === 0x42 &&
|
||||
header[11] === 0x50
|
||||
) {
|
||||
return "image/webp";
|
||||
}
|
||||
|
||||
// MP4: "ftyp" at byte offset 4
|
||||
if (
|
||||
header[4] === 0x66 &&
|
||||
header[5] === 0x74 &&
|
||||
header[6] === 0x79 &&
|
||||
header[7] === 0x70
|
||||
) {
|
||||
return "video/mp4";
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// validateMediaPath
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Validate that a local media file exists, is within size limits, and is one
|
||||
* of the supported media types (JPEG, PNG, WebP, MP4).
|
||||
*
|
||||
* @returns The resolved absolute path to the file.
|
||||
*/
|
||||
export async function validateMediaPath(
|
||||
filePath: string,
|
||||
opts?: { maxSizeMB?: number },
|
||||
): Promise<string> {
|
||||
const resolved = path.resolve(filePath);
|
||||
|
||||
// Guard against path traversal -- reject if the *original* input tries to
|
||||
// escape via ".." segments. We check the raw input rather than the resolved
|
||||
// path so that a legitimate directory named ".." is not silently accepted.
|
||||
if (filePath.includes("..")) {
|
||||
throw new Error(
|
||||
`Path traversal detected: the path must not contain ".." segments`,
|
||||
);
|
||||
}
|
||||
|
||||
// Existence & size check
|
||||
let stats: Awaited<ReturnType<typeof stat>>;
|
||||
try {
|
||||
stats = await stat(resolved);
|
||||
} catch {
|
||||
throw new Error(`File not found: ${resolved}`);
|
||||
}
|
||||
|
||||
if (!stats.isFile()) {
|
||||
throw new Error(`Not a regular file: ${resolved}`);
|
||||
}
|
||||
|
||||
const maxBytes = (opts?.maxSizeMB ?? DEFAULT_MAX_SIZE_MB) * BYTES_PER_MB;
|
||||
if (stats.size > maxBytes) {
|
||||
const sizeMB = (stats.size / BYTES_PER_MB).toFixed(2);
|
||||
const limitMB = (maxBytes / BYTES_PER_MB).toFixed(0);
|
||||
throw new Error(
|
||||
`File too large: ${sizeMB} MB exceeds the ${limitMB} MB limit`,
|
||||
);
|
||||
}
|
||||
|
||||
if (stats.size < MAGIC_BYTES_LEN) {
|
||||
throw new Error(`File too small to identify media type (${stats.size} bytes)`);
|
||||
}
|
||||
|
||||
// MIME type check via magic bytes
|
||||
const fd = await open(resolved, "r");
|
||||
try {
|
||||
const buf = Buffer.alloc(MAGIC_BYTES_LEN);
|
||||
await fd.read(buf, 0, MAGIC_BYTES_LEN, 0);
|
||||
const mime = detectMimeType(buf);
|
||||
|
||||
if (mime === undefined) {
|
||||
throw new Error(
|
||||
`Unsupported media type for file: ${resolved}. ` +
|
||||
`Supported types: JPEG, PNG, WebP, MP4`,
|
||||
);
|
||||
}
|
||||
|
||||
logger.debug({ path: resolved, mime, bytes: stats.size }, "媒体文件校验通过");
|
||||
} finally {
|
||||
await fd.close();
|
||||
}
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// downloadFile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Derive a file extension from the URL path or the Content-Type header.
|
||||
* Falls back to an empty string when neither source yields a known extension.
|
||||
*/
|
||||
function deriveExtension(url: string, contentType: string | null): string {
|
||||
// Try to pull an extension from the URL pathname first.
|
||||
try {
|
||||
const pathname = new URL(url).pathname;
|
||||
const ext = path.extname(pathname).toLowerCase();
|
||||
if (ext && ext.length <= 5) {
|
||||
return ext;
|
||||
}
|
||||
} catch {
|
||||
// URL parsing failed -- fall through to Content-Type.
|
||||
}
|
||||
|
||||
// Fall back to Content-Type header.
|
||||
if (contentType) {
|
||||
const baseMime = contentType.split(";")[0]?.trim().toLowerCase();
|
||||
if (baseMime) {
|
||||
const ext = MIME_TO_EXT[baseMime];
|
||||
if (ext) {
|
||||
return ext;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a file from a remote URL and save it into `destDir` with a random
|
||||
* filename.
|
||||
*
|
||||
* @returns The absolute path to the downloaded file.
|
||||
*/
|
||||
export async function downloadFile(
|
||||
url: string,
|
||||
destDir: string,
|
||||
): Promise<string> {
|
||||
const resolvedDir = path.resolve(destDir);
|
||||
|
||||
// Ensure destination directory exists (recursive in case parents are missing).
|
||||
await mkdir(resolvedDir, { recursive: true });
|
||||
|
||||
logger.debug({ url, destDir: resolvedDir }, "开始下载文件");
|
||||
|
||||
const response = await fetch(url);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Download failed: HTTP ${String(response.status)} ${response.statusText} for ${url}`,
|
||||
);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get("content-type");
|
||||
const ext = deriveExtension(url, contentType);
|
||||
const filename = `${randomUUID()}${ext}`;
|
||||
const destPath = path.join(resolvedDir, filename);
|
||||
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
const buffer = Buffer.from(arrayBuffer);
|
||||
|
||||
await writeFile(destPath, buffer, { mode: 0o600 });
|
||||
|
||||
logger.debug(
|
||||
{ path: destPath, bytes: buffer.length, mime: contentType },
|
||||
"文件下载完成",
|
||||
);
|
||||
|
||||
return destPath;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// cleanupFile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Delete a local file. Silently succeeds if the file does not exist.
|
||||
*/
|
||||
export async function cleanupFile(filePath: string): Promise<void> {
|
||||
try {
|
||||
await unlink(filePath);
|
||||
logger.debug({ path: filePath }, "临时文件已清理");
|
||||
} catch (err: unknown) {
|
||||
// ENOENT means the file was already gone -- that is fine.
|
||||
if (err instanceof Error && "code" in err && err.code === "ENOENT") {
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
import { logger } from './logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Error classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export enum ErrorCategory {
|
||||
TIMEOUT = 'TIMEOUT',
|
||||
AUTH_REQUIRED = 'AUTH_REQUIRED',
|
||||
SELECTOR_NOT_FOUND = 'SELECTOR_NOT_FOUND',
|
||||
NETWORK = 'NETWORK',
|
||||
PLATFORM_ERROR = 'PLATFORM_ERROR',
|
||||
INTERNAL = 'INTERNAL',
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect an Error's `message` and `name` to determine which category it
|
||||
* belongs to. The checks are intentionally broad so that errors surfaced by
|
||||
* Playwright, Puppeteer, or native fetch all get classified correctly.
|
||||
*/
|
||||
export function classifyError(err: Error): ErrorCategory {
|
||||
const haystack = `${err.name} ${err.message}`.toLowerCase();
|
||||
|
||||
// Selector check BEFORE timeout — Playwright's selector timeout message
|
||||
// is "Timeout waiting for selector ..." which contains both keywords.
|
||||
// The more specific match must come first.
|
||||
if (
|
||||
haystack.includes('waiting for selector') ||
|
||||
haystack.includes('找不到元素')
|
||||
) {
|
||||
return ErrorCategory.SELECTOR_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (haystack.includes('timeout') || err.name === 'TimeoutError') {
|
||||
return ErrorCategory.TIMEOUT;
|
||||
}
|
||||
|
||||
if (haystack.includes('net::err_')) {
|
||||
return ErrorCategory.NETWORK;
|
||||
}
|
||||
|
||||
if (haystack.includes('login') || haystack.includes('登录')) {
|
||||
return ErrorCategory.AUTH_REQUIRED;
|
||||
}
|
||||
|
||||
return ErrorCategory.INTERNAL;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Message sanitization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Strip potentially sensitive or noisy information from an error message
|
||||
* before it is returned to the MCP client.
|
||||
*
|
||||
* - File-system paths (/xxx/yyy/...) -> [path]
|
||||
* - URLs (http(s)://...) -> [url]
|
||||
* - Long hex strings (>= 32 chars) -> [hash]
|
||||
* - Truncated to 200 characters
|
||||
*/
|
||||
export function sanitizeErrorMessage(message: string): string {
|
||||
let sanitized = message;
|
||||
|
||||
// Replace URLs first so that the path regex does not partially match them.
|
||||
sanitized = sanitized.replace(/https?:\/\/[^\s)'"]+/g, '[url]');
|
||||
|
||||
// Replace absolute file-system paths (Unix-style).
|
||||
sanitized = sanitized.replace(/\/(?:[^\s/]+\/)+[^\s/)'":]*/g, '[path]');
|
||||
|
||||
// Replace long hexadecimal strings (session ids, hashes, tokens, etc.).
|
||||
sanitized = sanitized.replace(/[0-9a-fA-F]{32,}/g, '[hash]');
|
||||
|
||||
// Truncate to 200 characters.
|
||||
if (sanitized.length > 200) {
|
||||
sanitized = sanitized.slice(0, 200);
|
||||
}
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MCP tool result type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type McpToolResult = {
|
||||
content: Array<{ type: 'text'; text: string }>;
|
||||
isError?: boolean;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Error-handling wrapper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Execute an MCP tool handler inside a try/catch that automatically
|
||||
* classifies, sanitizes, and logs any thrown error before returning a
|
||||
* well-structured MCP error response.
|
||||
*
|
||||
* Usage:
|
||||
* ```ts
|
||||
* const result = await withErrorHandling('publish_post', async () => {
|
||||
* // ... tool logic that returns McpToolResult
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export async function withErrorHandling(
|
||||
toolName: string,
|
||||
fn: () => Promise<McpToolResult>,
|
||||
): Promise<McpToolResult> {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (caught: unknown) {
|
||||
const err =
|
||||
caught instanceof Error ? caught : new Error(String(caught));
|
||||
|
||||
const category = classifyError(err);
|
||||
const sanitized = sanitizeErrorMessage(err.message);
|
||||
|
||||
logger.error(
|
||||
{ tool: toolName, category, err },
|
||||
'工具执行失败',
|
||||
);
|
||||
|
||||
const payload = JSON.stringify({
|
||||
tool: toolName,
|
||||
error: category,
|
||||
message: sanitized,
|
||||
});
|
||||
|
||||
return {
|
||||
content: [{ type: 'text', text: payload }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
import pino from "pino";
|
||||
|
||||
const isProduction = process.env["NODE_ENV"] === "production";
|
||||
|
||||
// In production, suppress Playwright debug output that bypasses pino.
|
||||
if (isProduction) {
|
||||
delete process.env["DEBUG"];
|
||||
}
|
||||
|
||||
const redactPaths: string[] = [
|
||||
// Auth & credentials
|
||||
"**.cookie",
|
||||
"**.cookies",
|
||||
"**.set-cookie",
|
||||
"**.authorization",
|
||||
"**.password",
|
||||
"**.secret",
|
||||
|
||||
// Tokens
|
||||
"**.token",
|
||||
"**.xsec_token",
|
||||
"**.access_token",
|
||||
"**.refresh_token",
|
||||
|
||||
// API keys
|
||||
"**.api_key",
|
||||
"**.apikey",
|
||||
|
||||
// Sessions
|
||||
"**.sessionid",
|
||||
"**.session_id",
|
||||
|
||||
// Playwright StorageState structures
|
||||
"**.cookies[*].value",
|
||||
"**.origins[*].localStorage[*].value",
|
||||
];
|
||||
|
||||
const errorSerializer = (err: Error): Record<string, unknown> => {
|
||||
const serialized: Record<string, unknown> = {
|
||||
type: err.constructor?.name ?? "Error",
|
||||
message: err.message,
|
||||
};
|
||||
|
||||
if (!isProduction && err.stack) {
|
||||
serialized["stack"] = err.stack;
|
||||
}
|
||||
|
||||
return serialized;
|
||||
};
|
||||
|
||||
export const logger: pino.Logger = pino({
|
||||
level: process.env["LOG_LEVEL"] ?? "info",
|
||||
redact: {
|
||||
paths: redactPaths,
|
||||
censor: "[REDACTED]",
|
||||
},
|
||||
serializers: {
|
||||
err: errorSerializer,
|
||||
error: errorSerializer,
|
||||
},
|
||||
...(isProduction
|
||||
? {}
|
||||
: {
|
||||
transport: {
|
||||
target: "pino-pretty",
|
||||
},
|
||||
}),
|
||||
});
|
||||
Reference in New Issue
Block a user