8da5f40c9f
多平台社交自动化 MCP 服务,首批支持小红书。 - 13 个 MCP 工具:登录管理、内容浏览、发布、互动 - 13 个 REST API 端点,支持 Bearer token 认证和限流 - BrowserManager:串行队列、背压、崩溃恢复 - Cookie 持久化:原子写入、0600 权限 - 安全:DNS rebinding 防御、错误脱敏、深层日志 redact - Docker 部署支持 - 28 个单元测试全部通过
375 lines
12 KiB
TypeScript
375 lines
12 KiB
TypeScript
import {
|
|
chromium,
|
|
type Browser,
|
|
type BrowserContext,
|
|
type Page,
|
|
} from 'rebrowser-playwright';
|
|
|
|
import { config } from '../config/index.js';
|
|
import { cookieStore } from '../cookie/store.js';
|
|
import { logger } from '../utils/logger.js';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Constants
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** How long acquirePage() waits before auto-releasing the page (5 minutes). */
|
|
const ACQUIRE_SAFETY_TIMEOUT_MS = 5 * 60_000;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// BrowserManager
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export class BrowserManager {
|
|
// -- State ----------------------------------------------------------------
|
|
|
|
private browser: Browser | null = null;
|
|
private contexts = new Map<string, BrowserContext>();
|
|
private queues = new Map<string, Promise<void>>();
|
|
private queueDepths = new Map<string, number>();
|
|
|
|
/**
|
|
* Mutex-style promise that prevents concurrent browser launches.
|
|
* While a launch is in-flight every caller awaits the same promise.
|
|
*/
|
|
private launchPromise: Promise<Browser> | null = null;
|
|
|
|
// -- Public API -----------------------------------------------------------
|
|
|
|
/**
|
|
* Execute `fn` on a fresh page inside the platform-specific context.
|
|
*
|
|
* Operations are serialised per-platform through a promise chain so that
|
|
* at most one page is active per platform at any time. Back-pressure is
|
|
* enforced via `config.maxQueueDepth`.
|
|
*
|
|
* @param platform - Platform identifier (e.g. "twitter", "xiaohongshu").
|
|
* @param fn - Async callback that receives the page.
|
|
* @param timeoutMs - Optional timeout override (defaults to the
|
|
* `operationTimeouts.default` value from config).
|
|
* @returns The value returned by `fn`.
|
|
*/
|
|
async withPage<T>(
|
|
platform: string,
|
|
fn: (page: Page) => Promise<T>,
|
|
timeoutMs?: number,
|
|
): Promise<T> {
|
|
// -- Back-pressure check ------------------------------------------------
|
|
const currentDepth = this.queueDepths.get(platform) ?? 0;
|
|
if (currentDepth >= config.maxQueueDepth) {
|
|
throw new Error(
|
|
`Queue full for platform "${platform}" (depth=${currentDepth}, ` +
|
|
`max=${config.maxQueueDepth}). Try again later.`,
|
|
);
|
|
}
|
|
this.queueDepths.set(platform, currentDepth + 1);
|
|
|
|
// -- Resolve effective timeout ------------------------------------------
|
|
const effectiveTimeout =
|
|
timeoutMs ?? config.operationTimeouts['default'] ?? 60_000;
|
|
|
|
// -- Build the task and chain onto the per-platform queue ---------------
|
|
const previous = this.queues.get(platform) ?? Promise.resolve();
|
|
|
|
const task: Promise<T> = previous.then(async () => {
|
|
const browser = await this.ensureBrowser();
|
|
|
|
// The browser may have disconnected while this task was queued.
|
|
if (!browser.isConnected()) {
|
|
throw new Error('Browser disconnected while waiting in queue');
|
|
}
|
|
|
|
const ctx = await this.getContext(platform);
|
|
const page = await ctx.newPage();
|
|
|
|
page.setDefaultTimeout(effectiveTimeout);
|
|
page.setDefaultNavigationTimeout(effectiveTimeout);
|
|
|
|
try {
|
|
// Race the user function against a hard timeout. clearTimeout
|
|
// is handled implicitly: when fn resolves first the timeout
|
|
// promise is simply abandoned and its timer unref'd so it cannot
|
|
// keep the process alive.
|
|
const result = await Promise.race<T>([
|
|
fn(page),
|
|
this.createTimeout<T>(effectiveTimeout, platform),
|
|
]);
|
|
return result;
|
|
} finally {
|
|
await page.close().catch((err: unknown) => {
|
|
logger.warn({ err, platform }, 'Failed to close page');
|
|
});
|
|
}
|
|
});
|
|
|
|
// Swallow errors so the promise chain continues for the next caller.
|
|
// The actual rejection is still returned to **this** caller via `task`.
|
|
const chainContinuation = task.then(
|
|
() => {},
|
|
() => {},
|
|
);
|
|
|
|
// Decrement queue depth when this task settles, regardless of outcome.
|
|
void chainContinuation.finally(() => {
|
|
const depth = this.queueDepths.get(platform) ?? 1;
|
|
if (depth <= 1) {
|
|
this.queueDepths.delete(platform);
|
|
} else {
|
|
this.queueDepths.set(platform, depth - 1);
|
|
}
|
|
});
|
|
|
|
this.queues.set(platform, chainContinuation);
|
|
|
|
return task;
|
|
}
|
|
|
|
/**
|
|
* Acquire a page that the caller manages manually (e.g. for interactive
|
|
* login flows). The caller **must** call `release()` when finished.
|
|
*
|
|
* A safety-net timer auto-releases the page after 5 minutes to prevent
|
|
* resource leaks if the caller forgets.
|
|
*
|
|
* @param platform - Platform identifier.
|
|
* @returns Object with `page` and an idempotent `release` function.
|
|
*/
|
|
async acquirePage(
|
|
platform: string,
|
|
): Promise<{ page: Page; release: () => Promise<void> }> {
|
|
await this.ensureBrowser();
|
|
const ctx = await this.getContext(platform);
|
|
const page = await ctx.newPage();
|
|
|
|
let released = false;
|
|
|
|
const release = async (): Promise<void> => {
|
|
if (released) return;
|
|
released = true;
|
|
clearTimeout(safetyTimer);
|
|
await page.close().catch((err: unknown) => {
|
|
logger.warn({ err, platform }, 'Failed to close acquired page');
|
|
});
|
|
};
|
|
|
|
const safetyTimer = setTimeout(() => {
|
|
if (!released) {
|
|
logger.warn(
|
|
{ platform },
|
|
`acquirePage safety timeout: auto-releasing page after ${ACQUIRE_SAFETY_TIMEOUT_MS}ms`,
|
|
);
|
|
void release();
|
|
}
|
|
}, ACQUIRE_SAFETY_TIMEOUT_MS);
|
|
|
|
// Prevent the timer from keeping the Node.js process alive.
|
|
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
|
|
safetyTimer.unref();
|
|
}
|
|
|
|
return { page, release };
|
|
}
|
|
|
|
/**
|
|
* Save the current cookie / storage state of a platform's browser context
|
|
* to disk via the CookieStore.
|
|
*
|
|
* @param platform - Platform identifier whose context should be persisted.
|
|
*/
|
|
async saveCookies(platform: string): Promise<void> {
|
|
const ctx = this.contexts.get(platform);
|
|
if (!ctx) {
|
|
logger.warn(
|
|
{ platform },
|
|
'saveCookies called but no context exists for platform',
|
|
);
|
|
return;
|
|
}
|
|
|
|
const state = await ctx.storageState();
|
|
// Playwright's storageState() return type is structurally compatible with
|
|
// our CookieStore's StorageState interface.
|
|
await cookieStore.save(platform, state);
|
|
logger.debug({ platform }, 'Cookies saved');
|
|
}
|
|
|
|
/**
|
|
* Wait for every in-flight platform queue to settle. Useful during
|
|
* graceful shutdown so that running operations finish before teardown.
|
|
*/
|
|
async drain(): Promise<void> {
|
|
const pending = [...this.queues.values()];
|
|
if (pending.length === 0) return;
|
|
|
|
logger.info(
|
|
{ queueCount: pending.length },
|
|
'Draining browser operation queues',
|
|
);
|
|
|
|
await Promise.allSettled(pending);
|
|
|
|
logger.info('All browser operation queues drained');
|
|
}
|
|
|
|
/**
|
|
* Close all browser contexts and the browser itself, then reset internal
|
|
* state. Safe to call multiple times.
|
|
*/
|
|
async close(): Promise<void> {
|
|
// Close every context individually so callers that need to flush
|
|
// storageState can do so before calling close().
|
|
const contextClosePromises = [...this.contexts.values()].map((ctx) =>
|
|
ctx.close().catch((err: unknown) => {
|
|
logger.warn({ err }, 'Error closing browser context during shutdown');
|
|
}),
|
|
);
|
|
await Promise.all(contextClosePromises);
|
|
|
|
if (this.browser) {
|
|
await this.browser.close().catch((err: unknown) => {
|
|
logger.warn({ err }, 'Error closing browser during shutdown');
|
|
});
|
|
}
|
|
|
|
this.browser = null;
|
|
this.contexts.clear();
|
|
this.queues.clear();
|
|
this.queueDepths.clear();
|
|
this.launchPromise = null;
|
|
|
|
logger.info('BrowserManager closed');
|
|
}
|
|
|
|
// -- Private helpers ------------------------------------------------------
|
|
|
|
/**
|
|
* Ensure the browser is launched and connected. Uses a launch mutex so
|
|
* that concurrent callers share a single launch attempt instead of
|
|
* spawning multiple browser processes.
|
|
*/
|
|
private async ensureBrowser(): Promise<Browser> {
|
|
if (this.browser?.isConnected()) {
|
|
return this.browser;
|
|
}
|
|
|
|
// If another caller is already launching, piggy-back on that promise.
|
|
if (this.launchPromise) {
|
|
return this.launchPromise;
|
|
}
|
|
|
|
this.launchPromise = this.launchBrowser();
|
|
|
|
try {
|
|
const browser = await this.launchPromise;
|
|
return browser;
|
|
} finally {
|
|
this.launchPromise = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Launch a Chromium instance via rebrowser-playwright.
|
|
*/
|
|
private async launchBrowser(): Promise<Browser> {
|
|
logger.info(
|
|
{ headless: config.headless, browserBin: config.browserBin ?? 'default' },
|
|
'Launching browser',
|
|
);
|
|
|
|
const browser = await chromium.launch({
|
|
headless: config.headless,
|
|
...(config.browserBin ? { executablePath: config.browserBin } : {}),
|
|
});
|
|
|
|
// React to unexpected disconnects (e.g. browser crash, OOM kill).
|
|
browser.on('disconnected', () => {
|
|
logger.error('Browser disconnected unexpectedly');
|
|
this.browser = null;
|
|
this.contexts.clear();
|
|
// launchPromise is intentionally NOT cleared here so the next caller
|
|
// that calls ensureBrowser() will attempt a fresh launch.
|
|
});
|
|
|
|
this.browser = browser;
|
|
|
|
logger.info('Browser launched successfully');
|
|
|
|
return browser;
|
|
}
|
|
|
|
/**
|
|
* Get (or lazily create) a BrowserContext for the given platform.
|
|
*
|
|
* On first creation we attempt to restore cookies from the CookieStore
|
|
* so that sessions survive process restarts.
|
|
*/
|
|
private async getContext(platform: string): Promise<BrowserContext> {
|
|
const existing = this.contexts.get(platform);
|
|
if (existing) return existing;
|
|
|
|
if (!this.browser) {
|
|
throw new Error('Cannot create context: browser is not launched');
|
|
}
|
|
|
|
// Attempt to restore a previous session's storage state from disk.
|
|
let storageState: Awaited<ReturnType<BrowserContext['storageState']>> | undefined;
|
|
try {
|
|
const loaded = await cookieStore.load(platform);
|
|
if (loaded) {
|
|
storageState = loaded;
|
|
logger.debug(
|
|
{ platform, cookieCount: loaded.cookies.length },
|
|
'Restoring saved cookies into new context',
|
|
);
|
|
}
|
|
} catch (err: unknown) {
|
|
// Cookie load failure should never prevent context creation.
|
|
logger.warn(
|
|
{ err, platform },
|
|
'Failed to load saved cookies -- creating fresh context',
|
|
);
|
|
}
|
|
|
|
const ctx = await this.browser.newContext(
|
|
storageState ? { storageState } : {},
|
|
);
|
|
|
|
this.contexts.set(platform, ctx);
|
|
|
|
logger.debug({ platform }, 'Browser context created');
|
|
|
|
return ctx;
|
|
}
|
|
|
|
/**
|
|
* Create a promise that rejects after `ms` milliseconds, used as the
|
|
* timeout arm in `Promise.race` inside `withPage`.
|
|
*
|
|
* The timer is `unref()`'d so it cannot keep the Node.js event loop alive
|
|
* during graceful shutdown. When the user's function wins the race the
|
|
* dangling timeout promise is harmlessly garbage-collected.
|
|
*/
|
|
private createTimeout<T>(ms: number, platform: string): Promise<T> {
|
|
return new Promise<T>((_resolve, reject) => {
|
|
const timer = setTimeout(() => {
|
|
reject(
|
|
new Error(
|
|
`Operation timed out after ${ms}ms for platform "${platform}"`,
|
|
),
|
|
);
|
|
}, ms);
|
|
|
|
// Prevent the timeout from keeping the process alive during shutdown.
|
|
if (typeof timer === 'object' && 'unref' in timer) {
|
|
timer.unref();
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Singleton export
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export const browserManager = new BrowserManager();
|