重构为Monorepo:拆分xhs/xhh应用与core包并完成双服务部署改造

This commit is contained in:
2026-03-03 16:06:16 +08:00
parent ed7fbdd5c2
commit 2cbd6b28b2
84 changed files with 6332 additions and 7678 deletions
+395
View File
@@ -0,0 +1,395 @@
import {
chromium,
type Browser,
type BrowserContext,
type Page,
} from 'rebrowser-playwright';
import { config } from '../config/index.js';
import { cookieStore } from '../cookie/store.js';
import { logger } from '../utils/logger.js';
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/** How long acquirePage() waits before auto-releasing the page (5 minutes). */
const ACQUIRE_SAFETY_TIMEOUT_MS = 5 * 60_000;
// ---------------------------------------------------------------------------
// BrowserManager
// ---------------------------------------------------------------------------
export class BrowserManager {
// -- State ----------------------------------------------------------------
private browser: Browser | null = null;
private contexts = new Map<string, BrowserContext>();
private queues = new Map<string, Promise<void>>();
private queueDepths = new Map<string, number>();
/**
* Mutex-style promise that prevents concurrent browser launches.
* While a launch is in-flight every caller awaits the same promise.
*/
private launchPromise: Promise<Browser> | null = null;
// -- Public API -----------------------------------------------------------
/**
* Execute `fn` on a fresh page inside the platform-specific context.
*
* Operations are serialised per-platform through a promise chain so that
* at most one page is active per platform at any time. Back-pressure is
* enforced via `config.maxQueueDepth`.
*
* @param platform - Platform identifier (e.g. "twitter", "xiaohongshu").
* @param fn - Async callback that receives the page.
* @param timeoutMs - Optional timeout override (defaults to the
* `operationTimeouts.default` value from config).
* @returns The value returned by `fn`.
*/
async withPage<T>(
platform: string,
fn: (page: Page) => Promise<T>,
timeoutMs?: number,
): Promise<T> {
// -- Back-pressure check ------------------------------------------------
const currentDepth = this.queueDepths.get(platform) ?? 0;
if (currentDepth >= config.maxQueueDepth) {
throw new Error(
`Queue full for platform "${platform}" (depth=${currentDepth}, ` +
`max=${config.maxQueueDepth}). Try again later.`,
);
}
this.queueDepths.set(platform, currentDepth + 1);
// -- Resolve effective timeout ------------------------------------------
const effectiveTimeout =
timeoutMs ?? config.operationTimeouts['default'] ?? 60_000;
// -- Build the task and chain onto the per-platform queue ---------------
const previous = this.queues.get(platform) ?? Promise.resolve();
const task: Promise<T> = previous.then(async () => {
const browser = await this.ensureBrowser();
// The browser may have disconnected while this task was queued.
if (!browser.isConnected()) {
throw new Error('Browser disconnected while waiting in queue');
}
const ctx = await this.getContext(platform);
const page = await ctx.newPage();
page.setDefaultTimeout(effectiveTimeout);
page.setDefaultNavigationTimeout(effectiveTimeout);
try {
// Race the user function against a hard timeout. clearTimeout
// is handled implicitly: when fn resolves first the timeout
// promise is simply abandoned and its timer unref'd so it cannot
// keep the process alive.
const result = await Promise.race<T>([
fn(page),
this.createTimeout<T>(effectiveTimeout, platform),
]);
return result;
} finally {
await page.close().catch((err: unknown) => {
logger.warn({ err, platform }, 'Failed to close page');
});
}
});
// Swallow errors so the promise chain continues for the next caller.
// The actual rejection is still returned to **this** caller via `task`.
const chainContinuation = task.then(
() => {},
() => {},
);
// Decrement queue depth when this task settles, regardless of outcome.
void chainContinuation.finally(() => {
const depth = this.queueDepths.get(platform) ?? 1;
if (depth <= 1) {
this.queueDepths.delete(platform);
} else {
this.queueDepths.set(platform, depth - 1);
}
});
this.queues.set(platform, chainContinuation);
return task;
}
/**
* Acquire a page that the caller manages manually (e.g. for interactive
* login flows). The caller **must** call `release()` when finished.
*
* A safety-net timer auto-releases the page after 5 minutes to prevent
* resource leaks if the caller forgets.
*
* @param platform - Platform identifier.
* @returns Object with `page` and an idempotent `release` function.
*/
async acquirePage(
platform: string,
): Promise<{ page: Page; release: () => Promise<void> }> {
await this.ensureBrowser();
const ctx = await this.getContext(platform);
const page = await ctx.newPage();
let released = false;
const release = async (): Promise<void> => {
if (released) return;
released = true;
clearTimeout(safetyTimer);
await page.close().catch((err: unknown) => {
logger.warn({ err, platform }, 'Failed to close acquired page');
});
};
const safetyTimer = setTimeout(() => {
if (!released) {
logger.warn(
{ platform },
`acquirePage safety timeout: auto-releasing page after ${ACQUIRE_SAFETY_TIMEOUT_MS}ms`,
);
void release();
}
}, ACQUIRE_SAFETY_TIMEOUT_MS);
// Prevent the timer from keeping the Node.js process alive.
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
safetyTimer.unref();
}
return { page, release };
}
/**
* Save the current cookie / storage state of a platform's browser context
* to disk via the CookieStore.
*
* @param platform - Platform identifier whose context should be persisted.
*/
async saveCookies(platform: string): Promise<void> {
const ctx = this.contexts.get(platform);
if (!ctx) {
logger.warn(
{ platform },
'saveCookies called but no context exists for platform',
);
return;
}
const state = await ctx.storageState();
// Playwright's storageState() return type is structurally compatible with
// our CookieStore's StorageState interface.
await cookieStore.save(platform, state);
logger.debug({ platform }, 'Cookies saved');
}
/**
* Close and discard the browser context for a platform so the next
* operation creates a fresh one (without any cookies or storage).
*/
async clearContext(platform: string): Promise<void> {
const ctx = this.contexts.get(platform);
if (!ctx) return;
this.contexts.delete(platform);
await ctx.close().catch((err: unknown) => {
logger.warn({ err, platform }, 'Error closing context during clearContext');
});
logger.info({ platform }, 'Browser context cleared');
}
/**
* Wait for every in-flight platform queue to settle. Useful during
* graceful shutdown so that running operations finish before teardown.
*/
async drain(): Promise<void> {
const pending = [...this.queues.values()];
if (pending.length === 0) return;
logger.info(
{ queueCount: pending.length },
'Draining browser operation queues',
);
await Promise.allSettled(pending);
logger.info('All browser operation queues drained');
}
/**
* Close all browser contexts and the browser itself, then reset internal
* state. Safe to call multiple times.
*/
async close(): Promise<void> {
// Close every context individually so callers that need to flush
// storageState can do so before calling close().
const contextClosePromises = [...this.contexts.values()].map((ctx) =>
ctx.close().catch((err: unknown) => {
logger.warn({ err }, 'Error closing browser context during shutdown');
}),
);
await Promise.all(contextClosePromises);
if (this.browser) {
await this.browser.close().catch((err: unknown) => {
logger.warn({ err }, 'Error closing browser during shutdown');
});
}
this.browser = null;
this.contexts.clear();
this.queues.clear();
this.queueDepths.clear();
this.launchPromise = null;
logger.info('BrowserManager closed');
}
// -- Private helpers ------------------------------------------------------
/**
* Ensure the browser is launched and connected. Uses a launch mutex so
* that concurrent callers share a single launch attempt instead of
* spawning multiple browser processes.
*/
private async ensureBrowser(): Promise<Browser> {
if (this.browser?.isConnected()) {
return this.browser;
}
// If another caller is already launching, piggy-back on that promise.
if (this.launchPromise) {
return this.launchPromise;
}
this.launchPromise = this.launchBrowser();
try {
const browser = await this.launchPromise;
return browser;
} finally {
this.launchPromise = null;
}
}
/**
* Launch a Chromium instance via rebrowser-playwright.
*/
private async launchBrowser(): Promise<Browser> {
logger.info(
{ headless: config.headless, browserBin: config.browserBin ?? 'default' },
'Launching browser',
);
const browser = await chromium.launch({
headless: config.headless,
...(config.browserBin ? { executablePath: config.browserBin } : {}),
args: [
'--no-sandbox', // Required in Docker (no user namespaces)
'--disable-setuid-sandbox',
'--disable-dev-shm-usage', // Use /tmp instead of /dev/shm
'--disable-gpu',
],
});
// React to unexpected disconnects (e.g. browser crash, OOM kill).
browser.on('disconnected', () => {
logger.error('Browser disconnected unexpectedly');
this.browser = null;
this.contexts.clear();
// launchPromise is intentionally NOT cleared here so the next caller
// that calls ensureBrowser() will attempt a fresh launch.
});
this.browser = browser;
logger.info('Browser launched successfully');
return browser;
}
/**
* Get (or lazily create) a BrowserContext for the given platform.
*
* On first creation we attempt to restore cookies from the CookieStore
* so that sessions survive process restarts.
*/
private async getContext(platform: string): Promise<BrowserContext> {
const existing = this.contexts.get(platform);
if (existing) return existing;
if (!this.browser) {
throw new Error('Cannot create context: browser is not launched');
}
// Attempt to restore a previous session's storage state from disk.
let storageState: Awaited<ReturnType<BrowserContext['storageState']>> | undefined;
try {
const loaded = await cookieStore.load(platform);
if (loaded) {
storageState = loaded;
logger.debug(
{ platform, cookieCount: loaded.cookies.length },
'Restoring saved cookies into new context',
);
}
} catch (err: unknown) {
// Cookie load failure should never prevent context creation.
logger.warn(
{ err, platform },
'Failed to load saved cookies -- creating fresh context',
);
}
const ctx = await this.browser.newContext(
storageState ? { storageState } : {},
);
this.contexts.set(platform, ctx);
logger.debug({ platform }, 'Browser context created');
return ctx;
}
/**
* Create a promise that rejects after `ms` milliseconds, used as the
* timeout arm in `Promise.race` inside `withPage`.
*
* The timer is `unref()`'d so it cannot keep the Node.js event loop alive
* during graceful shutdown. When the user's function wins the race the
* dangling timeout promise is harmlessly garbage-collected.
*/
private createTimeout<T>(ms: number, platform: string): Promise<T> {
return new Promise<T>((_resolve, reject) => {
const timer = setTimeout(() => {
reject(
new Error(
`Operation timed out after ${ms}ms for platform "${platform}"`,
),
);
}, ms);
// Prevent the timeout from keeping the process alive during shutdown.
if (typeof timer === 'object' && 'unref' in timer) {
timer.unref();
}
});
}
}
// ---------------------------------------------------------------------------
// Singleton export
// ---------------------------------------------------------------------------
export const browserManager = new BrowserManager();
+121
View File
@@ -0,0 +1,121 @@
import os from 'node:os';
import path from 'node:path';
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function envString(key: string, fallback: string): string {
return process.env[key] ?? fallback;
}
function envInt(key: string, fallback: number): number {
const raw = process.env[key];
if (raw === undefined) return fallback;
const parsed = Number.parseInt(raw, 10);
if (Number.isNaN(parsed)) {
// eslint-disable-next-line no-console
console.error(`[config] Invalid integer for ${key}="${raw}", using default ${fallback}`);
return fallback;
}
return parsed;
}
function envBool(key: string, fallback: boolean): boolean {
const raw = process.env[key];
if (raw === undefined) return fallback;
// Accept common truthy / falsy strings
if (['true', '1', 'yes'].includes(raw.toLowerCase())) return true;
if (['false', '0', 'no'].includes(raw.toLowerCase())) return false;
return fallback;
}
// ---------------------------------------------------------------------------
// HOST safety check — must run before exporting config
// ---------------------------------------------------------------------------
const host = envString('HOST', '127.0.0.1');
if (host === '0.0.0.0' || host === '::') {
const allow = process.env['ALLOW_REMOTE'];
if (allow !== 'yes-i-understand-the-risk') {
// Use console.error directly — the logger module depends on config,
// so it is not available yet at this point.
// eslint-disable-next-line no-console
console.error(
`[FATAL] HOST is set to "${host}" which exposes the service to the network.\n` +
`If you really intend to do this, set ALLOW_REMOTE=yes-i-understand-the-risk\n` +
`Refusing to start.`,
);
process.exit(1);
}
}
// ---------------------------------------------------------------------------
// Operation timeouts (milliseconds)
// Matches the tiers described in PLAN.md section 6.1
// ---------------------------------------------------------------------------
const operationTimeouts: Record<string, number> = {
like: 15_000, // 15s — quick interactions
favorite: 15_000, // 15s
comment: 20_000, // 20s
reply: 20_000, // 20s
feed_list: 30_000, // 30s — page load + extraction
search: 30_000, // 30s
feed_detail: 60_000, // 60s — includes scroll loading
user_profile: 60_000, // 60s
publish: 300_000, // 5min — upload may be slow
login: 300_000, // 5min — user interaction
default: 60_000, // 1min — fallback
};
// ---------------------------------------------------------------------------
// Config type
// ---------------------------------------------------------------------------
export interface AppConfig {
/** HTTP port */
port: number;
/** HTTP bind address */
host: string;
/** Run browser in headless mode */
headless: boolean;
/** Custom browser executable path (optional) */
browserBin: string | undefined;
/** Pino log level */
logLevel: string;
/** NODE_ENV */
nodeEnv: string;
/** Directory for per-platform cookie storage */
cookieDir: string;
/** Max pending operations per platform queue */
maxQueueDepth: number;
/** Per-operation-type timeout in ms */
operationTimeouts: Record<string, number>;
/** Whether the XHS notification poller is enabled */
notificationPollEnabled: boolean;
/** Poll interval for XHS notifications (ms) */
notificationPollIntervalMs: number;
/** Max notifications fetched per poll */
notificationPollMaxCount: number;
}
// ---------------------------------------------------------------------------
// Exported config singleton
// ---------------------------------------------------------------------------
export const config: AppConfig = {
port: envInt('PORT', 9527),
host,
headless: envBool('HEADLESS', true),
browserBin: process.env['BROWSER_BIN'] || undefined,
logLevel: envString('LOG_LEVEL', 'info'),
nodeEnv: envString('NODE_ENV', 'development'),
cookieDir: envString('COOKIE_DIR', path.join(os.homedir(), '.social-mcp-xhs')),
maxQueueDepth: envInt('MAX_QUEUE_DEPTH', 10),
operationTimeouts,
notificationPollEnabled: envBool('XHS_NOTIFICATION_POLL_ENABLED', true),
notificationPollIntervalMs: Math.max(15, envInt('XHS_NOTIFICATION_POLL_INTERVAL_SEC', 60)) * 1000,
notificationPollMaxCount: Math.max(1, Math.min(50, envInt('XHS_NOTIFICATION_POLL_MAX_COUNT', 20))),
};
+171
View File
@@ -0,0 +1,171 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { config } from '../config/index.js';
import { logger } from '../utils/logger.js';
// ---------------------------------------------------------------------------
// Types — mirrors Playwright's BrowserContext.storageState() shape
// ---------------------------------------------------------------------------
export interface Cookie {
name: string;
value: string;
domain: string;
path: string;
expires: number;
httpOnly: boolean;
secure: boolean;
sameSite: 'Strict' | 'Lax' | 'None';
}
export interface StorageState {
cookies: Cookie[];
origins: Array<{
origin: string;
localStorage: Array<{ name: string; value: string }>;
}>;
}
// ---------------------------------------------------------------------------
// CookieStore
// ---------------------------------------------------------------------------
const log = logger.child({ module: 'cookie-store' });
export class CookieStore {
/**
* Return the absolute path to the cookies.json for a given platform.
*/
getPath(platform: string): string {
return path.join(config.cookieDir, platform, 'cookies.json');
}
/**
* Load the persisted storage state for a platform.
* Returns `null` when no cookie file exists yet.
*/
async load(platform: string): Promise<StorageState | null> {
const filePath = this.getPath(platform);
try {
const raw = await fs.readFile(filePath, 'utf-8');
const parsed: unknown = JSON.parse(raw);
// Minimal structural validation so we don't blindly trust disk data.
if (!isStorageState(parsed)) {
log.warn({ platform, filePath }, 'Cookie file failed validation, treating as absent');
return null;
}
log.debug({ platform, cookieCount: parsed.cookies.length }, 'Loaded cookies from disk');
return parsed;
} catch (err: unknown) {
if (isNodeError(err) && err.code === 'ENOENT') {
log.debug({ platform }, 'No cookie file found');
return null;
}
log.error({ err, platform, filePath }, 'Failed to load cookie file');
throw err;
}
}
/**
* Persist a storage state for a platform using an atomic write.
*
* Strategy:
* 1. Ensure the platform directory exists (mode 0o700).
* 2. Write to a temporary file (`.tmp.<pid>`) inside the same directory.
* 3. Set file permissions to 0o600.
* 4. Atomically rename the temp file to the final path.
*
* Because rename is atomic on the same filesystem, readers will never
* observe a partially-written cookies.json.
*/
async save(platform: string, state: StorageState): Promise<void> {
const filePath = this.getPath(platform);
const dir = path.dirname(filePath);
const tmpPath = path.join(dir, `.tmp.${process.pid}`);
try {
// Ensure directory exists with restricted permissions.
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
const json = JSON.stringify(state, null, 2);
// Write to temp file, set permissions, then atomically rename.
await fs.writeFile(tmpPath, json, { encoding: 'utf-8', mode: 0o600 });
await fs.rename(tmpPath, filePath);
log.debug(
{ platform, cookieCount: state.cookies.length },
'Saved cookies to disk',
);
} catch (err: unknown) {
log.error({ err, platform, filePath }, 'Failed to save cookie file');
// Best-effort cleanup of the temp file.
try {
await fs.unlink(tmpPath);
} catch {
// Ignore — the temp file may not have been created.
}
throw err;
}
}
/**
* Delete the cookie file for a platform.
* Silently succeeds when no file exists.
*/
async delete(platform: string): Promise<void> {
const filePath = this.getPath(platform);
try {
await fs.unlink(filePath);
log.debug({ platform }, 'Deleted cookie file');
} catch (err: unknown) {
if (isNodeError(err) && err.code === 'ENOENT') {
log.debug({ platform }, 'Cookie file already absent, nothing to delete');
return;
}
log.error({ err, platform, filePath }, 'Failed to delete cookie file');
throw err;
}
}
}
// ---------------------------------------------------------------------------
// Singleton
// ---------------------------------------------------------------------------
export const cookieStore = new CookieStore();
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
interface NodeError extends Error {
code?: string;
}
function isNodeError(err: unknown): err is NodeError {
return err instanceof Error;
}
/**
* Lightweight runtime check that the parsed JSON matches the StorageState
* shape we expect. This is intentionally lenient — we only verify the
* top-level structure so that forward-compatible fields are not rejected.
*/
function isStorageState(value: unknown): value is StorageState {
if (typeof value !== 'object' || value === null) return false;
const obj = value as Record<string, unknown>;
if (!Array.isArray(obj['cookies'])) return false;
if (!Array.isArray(obj['origins'])) return false;
return true;
}
+535
View File
@@ -0,0 +1,535 @@
import http from 'node:http';
import { randomUUID } from 'node:crypto';
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import express from 'express';
import { config } from '../config/index.js';
import { BrowserManager, browserManager } from '../browser/manager.js';
import { logger } from '../utils/logger.js';
import {
shutdownGuard,
errorHandler,
bearerAuth,
initBearerToken,
} from './middleware.js';
// ---------------------------------------------------------------------------
// Package version -- read once at module load for the /health endpoint and
// the MCP server info. Uses a static string so we avoid importing JSON
// (which would require `resolveJsonModule` + ESM assertion gymnastics).
// ---------------------------------------------------------------------------
const PACKAGE_VERSION = '0.1.0';
// ---------------------------------------------------------------------------
// PlatformPlugin interface
// ---------------------------------------------------------------------------
/**
* Contract that every platform adapter (Twitter, Xiaohongshu, etc.) must
* implement to plug into the application.
*
* - `registerTools` is the minimum requirement: expose MCP tools.
* - Optional hooks allow the plugin to mount REST routes, perform async
* initialisation, clean up on shutdown, and report its health.
*/
export interface PlatformPlugin {
/** Human-readable name used in logs and health-check output. */
name: string;
/** REST namespace mounted at `/api/{apiNamespace}` when registerRoutes exists. */
apiNamespace?: string;
/** Register MCP tools on the shared McpServer instance. */
registerTools(server: McpServer, browser: BrowserManager): void;
/** Optionally mount Express routes (e.g. OAuth callbacks, webhooks). */
registerRoutes?(router: express.Router, browser: BrowserManager): void;
/** Async initialisation (cookie restore, feature detection, etc.). */
init?(): Promise<void>;
/** Teardown hook called during graceful shutdown. */
shutdown?(): Promise<void>;
/** Return platform-specific health information. */
healthCheck?(): Promise<{ healthy: boolean; message?: string }>;
}
// ---------------------------------------------------------------------------
// AppServer
// ---------------------------------------------------------------------------
export class AppServer {
// -- Public surface -------------------------------------------------------
/** The underlying Express application -- use for plugin route mounting. */
readonly app: express.Express;
/** The MCP server instance -- use for plugin tool registration. */
readonly mcpServer: McpServer;
// -- Internal state -------------------------------------------------------
private httpServer: http.Server | null = null;
private shuttingDown = false;
private readonly plugins: PlatformPlugin[] = [];
/** SSE transports keyed by session ID for the deprecated `/sse` + `/messages` flow. */
private readonly sseTransports = new Map<string, SSEServerTransport>();
/** Per-session MCP servers backing active SSE sessions. */
private readonly sseSessionServers = new Map<string, McpServer>();
/** Streamable HTTP transports keyed by MCP session ID. */
private readonly streamableTransports = new Map<string, StreamableHTTPServerTransport>();
/** Per-session MCP servers backing active Streamable HTTP sessions. */
private readonly streamableSessionServers = new Map<string, McpServer>();
// -- Constructor ----------------------------------------------------------
constructor() {
// 1. Express app
this.app = express();
// 2. Security & availability middleware
this.app.use(shutdownGuard(() => this.shuttingDown));
// 3. MCP server
this.mcpServer = new McpServer(
{ name: 'social-mcp', version: PACKAGE_VERSION },
);
// 4. MCP transport endpoints (BEFORE body parsing — MCP SDK reads raw body)
this.setupStreamableHttpEndpoint();
this.setupSseEndpoints();
// 5. Body parsing for non-MCP routes
this.app.use(express.json());
// 6. Health endpoint
this.setupHealthEndpoint();
// 7. Bearer token auth for /api/* routes
initBearerToken();
this.app.use('/api', bearerAuth);
// 7. Error handler (must be registered last -- re-registered after plugins)
this.app.use(errorHandler);
}
// -- Plugin registration --------------------------------------------------
/**
* Register a platform plugin. Call this **before** `start()` so that all
* tools and routes are wired up before the server begins accepting
* connections.
*/
registerPlugin(plugin: PlatformPlugin): void {
logger.info({ plugin: plugin.name }, 'Registering platform plugin');
plugin.registerTools(this.mcpServer, browserManager);
if (plugin.registerRoutes) {
const router = express.Router();
plugin.registerRoutes(router, browserManager);
const apiNamespace = plugin.apiNamespace ?? plugin.name;
this.app.use(`/api/${apiNamespace}`, router);
}
this.plugins.push(plugin);
}
// -- Lifecycle ------------------------------------------------------------
/**
* Initialise all plugins and start listening for HTTP connections on
* `config.host:config.port`.
*
* Returns a promise that resolves once the server is ready.
*/
async start(): Promise<void> {
// Initialise plugins (sequentially so order is deterministic).
for (const plugin of this.plugins) {
if (plugin.init) {
logger.info({ plugin: plugin.name }, 'Initialising plugin');
await plugin.init();
}
}
// Re-register the error handler so it sits after any plugin routes.
this.app.use(errorHandler);
return new Promise<void>((resolve, reject) => {
this.httpServer = this.app
.listen(config.port, config.host, () => {
logger.info(
{ host: config.host, port: config.port },
'AppServer listening',
);
resolve();
})
.on('error', (err: Error) => {
reject(err);
});
});
}
/**
* Initiate graceful shutdown:
* 1. Set the shutting-down flag (new requests get 503).
* 2. Shut down every plugin.
* 3. Close all active MCP transports and servers.
* 4. Close the HTTP server.
*/
async close(): Promise<void> {
if (this.shuttingDown) return;
this.shuttingDown = true;
logger.info('AppServer shutting down');
// Shut down plugins
for (const plugin of this.plugins) {
if (plugin.shutdown) {
try {
await plugin.shutdown();
} catch (err: unknown) {
logger.warn({ err, plugin: plugin.name }, 'Error shutting down plugin');
}
}
}
// Close Streamable HTTP transports first.
for (const [sessionId, transport] of this.streamableTransports) {
try {
await transport.close();
} catch (err: unknown) {
logger.warn({ err, sessionId }, 'Error closing Streamable HTTP transport');
}
}
this.streamableTransports.clear();
for (const [sessionId, sessionServer] of this.streamableSessionServers) {
try {
await sessionServer.close();
} catch (err: unknown) {
logger.warn({ err, sessionId }, 'Error closing Streamable HTTP MCP server');
}
}
this.streamableSessionServers.clear();
// Close deprecated SSE transports.
for (const [sessionId, transport] of this.sseTransports) {
try {
await transport.close();
} catch (err: unknown) {
logger.warn({ err, sessionId }, 'Error closing SSE transport');
}
}
this.sseTransports.clear();
for (const [sessionId, sessionServer] of this.sseSessionServers) {
try {
await sessionServer.close();
} catch (err: unknown) {
logger.warn({ err, sessionId }, 'Error closing SSE MCP server');
}
}
this.sseSessionServers.clear();
// Close the MCP server
try {
await this.mcpServer.close();
} catch (err: unknown) {
logger.warn({ err }, 'Error closing MCP server');
}
// Close the HTTP server
if (this.httpServer) {
await new Promise<void>((resolve) => {
this.httpServer!.close(() => {
resolve();
});
});
this.httpServer = null;
}
logger.info('AppServer shut down complete');
}
// -- Private: Streamable HTTP endpoint ------------------------------------
private setupStreamableHttpEndpoint(): void {
this.app.all('/mcp', (req, res) => {
void this.handleStreamableHttpRequest(req, res);
});
}
private async handleStreamableHttpRequest(
req: express.Request,
res: express.Response,
): Promise<void> {
const headerSessionId = this.getFirstHeaderValue(req.headers['mcp-session-id']);
let transport = headerSessionId
? this.streamableTransports.get(headerSessionId)
: undefined;
if (!transport && headerSessionId) {
if (this.sseTransports.has(headerSessionId)) {
this.sendProtocolMismatch(
res,
'Bad Request: Session exists but uses the deprecated SSE transport protocol',
);
return;
}
res.status(404).json({
jsonrpc: '2.0',
error: {
code: -32001,
message: 'Session not found',
},
id: null,
});
return;
}
let createdTransport = false;
let sessionServer: McpServer | null = null;
if (!transport) {
createdTransport = true;
const newTransport = new StreamableHTTPServerTransport({
sessionIdGenerator: () => randomUUID(),
onsessioninitialized: (sessionId) => {
logger.info({ sessionId }, 'Streamable HTTP session initialized');
this.streamableTransports.set(sessionId, newTransport);
if (sessionServer) {
this.streamableSessionServers.set(sessionId, sessionServer);
}
},
});
transport = newTransport;
newTransport.onclose = () => {
const sessionId = newTransport.sessionId;
if (!sessionId) return;
logger.info({ sessionId }, 'Streamable HTTP session closed');
this.cleanupStreamableSession(sessionId);
};
sessionServer = this.createSessionMcpServer();
await sessionServer.connect(newTransport);
}
const activeTransport = transport;
try {
await activeTransport.handleRequest(req, res, req.body);
} catch (err: unknown) {
logger.error(
{ err, sessionId: headerSessionId ?? activeTransport.sessionId },
'Error handling /mcp request',
);
if (!res.headersSent) {
res.status(500).json({
jsonrpc: '2.0',
error: {
code: -32603,
message: 'Internal server error',
},
id: null,
});
}
} finally {
// If no session was initialized (bad request / handshake failure), avoid leaks.
if (createdTransport && !activeTransport.sessionId) {
try {
await activeTransport.close();
} catch (err: unknown) {
logger.warn({ err }, 'Error closing ephemeral Streamable HTTP transport');
}
if (sessionServer) {
try {
await sessionServer.close();
} catch (err: unknown) {
logger.warn({ err }, 'Error closing ephemeral Streamable HTTP MCP server');
}
}
}
}
}
// -- Private: SSE endpoints -----------------------------------------------
private setupSseEndpoints(): void {
// GET /sse -- establish a new SSE connection
this.app.get('/sse', (req, res) => {
logger.debug({ ip: req.ip }, 'New SSE connection request');
const transport = new SSEServerTransport('/messages', res);
const sessionId = transport.sessionId;
this.sseTransports.set(sessionId, transport);
logger.info({ sessionId }, 'SSE transport created');
const perSessionMcp = this.createSessionMcpServer();
this.sseSessionServers.set(sessionId, perSessionMcp);
// Clean up when the client disconnects.
res.on('close', () => {
logger.info({ sessionId }, 'SSE client disconnected');
this.cleanupSseSession(sessionId);
});
void perSessionMcp.connect(transport).catch((err: unknown) => {
logger.error({ err, sessionId }, 'Failed to connect SSE transport to MCP server');
this.cleanupSseSession(sessionId);
});
});
// POST /messages -- receive JSON-RPC messages for an existing session
this.app.post('/messages', (req, res) => {
const sessionId = req.query['sessionId'] as string | undefined;
if (!sessionId) {
res.status(400).json({ error: 'Missing sessionId query parameter' });
return;
}
const transport = this.sseTransports.get(sessionId);
if (!transport) {
if (this.streamableTransports.has(sessionId)) {
this.sendProtocolMismatch(
res,
'Bad Request: Session exists but uses Streamable HTTP transport protocol',
);
return;
}
res.status(404).json({ error: 'Unknown or expired session' });
return;
}
// Delegate to the transport; it will parse the body and route the
// JSON-RPC message to the MCP server.
void transport.handlePostMessage(req, res).catch((err: unknown) => {
logger.error({ err, sessionId }, 'Error handling POST /messages');
if (!res.headersSent) {
res.status(500).json({ error: 'Internal server error' });
}
});
});
}
// -- Private: Health endpoint ---------------------------------------------
private setupHealthEndpoint(): void {
this.app.get('/health', (_req, res) => {
void this.buildHealthResponse()
.then((body) => {
const status = body.healthy ? 200 : 503;
res.status(status).json(body);
})
.catch((err: unknown) => {
logger.error({ err }, 'Health check failed unexpectedly');
res.status(500).json({ healthy: false, error: 'Health check error' });
});
});
}
private async buildHealthResponse(): Promise<Record<string, unknown>> {
// Memory usage
const mem = process.memoryUsage();
const memoryMb = {
rss: Math.round(mem.rss / 1024 / 1024),
heapUsed: Math.round(mem.heapUsed / 1024 / 1024),
heapTotal: Math.round(mem.heapTotal / 1024 / 1024),
external: Math.round(mem.external / 1024 / 1024),
};
// Active MCP sessions (streamable + deprecated SSE)
const activeSessions = this.streamableTransports.size + this.sseTransports.size;
// Plugin health checks
const pluginHealth: Record<string, { healthy: boolean; message?: string }> = {};
let allPluginsHealthy = true;
for (const plugin of this.plugins) {
if (plugin.healthCheck) {
try {
const result = await plugin.healthCheck();
pluginHealth[plugin.name] = result;
if (!result.healthy) {
allPluginsHealthy = false;
}
} catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err);
pluginHealth[plugin.name] = { healthy: false, message };
allPluginsHealthy = false;
}
} else {
pluginHealth[plugin.name] = { healthy: true };
}
}
const healthy = allPluginsHealthy && !this.shuttingDown;
return {
healthy,
version: PACKAGE_VERSION,
uptime: Math.round(process.uptime()),
shuttingDown: this.shuttingDown,
activeSessions,
plugins: pluginHealth,
memory: memoryMb,
};
}
private createSessionMcpServer(): McpServer {
const sessionServer = new McpServer(
{ name: 'social-mcp', version: PACKAGE_VERSION },
);
for (const plugin of this.plugins) {
plugin.registerTools(sessionServer, browserManager);
}
return sessionServer;
}
private cleanupSseSession(sessionId: string): void {
this.sseTransports.delete(sessionId);
const sessionServer = this.sseSessionServers.get(sessionId);
this.sseSessionServers.delete(sessionId);
if (!sessionServer) return;
void sessionServer.close().catch((err: unknown) => {
logger.warn({ err, sessionId }, 'Error closing SSE MCP server');
});
}
private cleanupStreamableSession(sessionId: string): void {
this.streamableTransports.delete(sessionId);
const sessionServer = this.streamableSessionServers.get(sessionId);
this.streamableSessionServers.delete(sessionId);
if (!sessionServer) return;
void sessionServer.close().catch((err: unknown) => {
logger.warn({ err, sessionId }, 'Error closing Streamable HTTP MCP server');
});
}
private sendProtocolMismatch(res: express.Response, message: string): void {
res.status(400).json({
jsonrpc: '2.0',
error: {
code: -32000,
message,
},
id: null,
});
}
private getFirstHeaderValue(value: string | string[] | undefined): string | undefined {
if (!value) return undefined;
return Array.isArray(value) ? value[0] : value;
}
}
+71
View File
@@ -0,0 +1,71 @@
import { browserManager } from '../browser/manager.js';
import { AppServer, type PlatformPlugin } from '../server/app.js';
import { logger } from '../utils/logger.js';
export function startServerWithPlugins(plugins: PlatformPlugin[]): void {
const appServer = new AppServer();
for (const plugin of plugins) {
appServer.registerPlugin(plugin);
}
appServer.start().catch((err: unknown) => {
logger.fatal({ err }, 'Failed to start server');
process.exit(1);
});
let shuttingDown = false;
async function gracefulShutdown(signal: string): Promise<void> {
if (shuttingDown) return;
shuttingDown = true;
logger.info({ signal }, 'Received shutdown signal — starting graceful shutdown');
const forceExitTimer = setTimeout(() => {
logger.fatal('Graceful shutdown timed out after 45s — forcing exit');
process.exit(1);
}, 45_000);
if (typeof forceExitTimer === 'object' && 'unref' in forceExitTimer) {
forceExitTimer.unref();
}
try {
logger.info('Shutdown step 1/5: draining browser queues');
await Promise.race([
browserManager.drain(),
new Promise<void>((resolve) => setTimeout(resolve, 30_000).unref()),
]);
logger.info('Shutdown step 2/5: closing browser');
await browserManager.close();
logger.info('Shutdown step 3/5: closing HTTP server');
await appServer.close();
logger.info('Shutdown step 4/5: flushing logger');
logger.flush();
logger.info('Shutdown step 5/5: exiting');
process.exit(0);
} catch (err: unknown) {
logger.fatal({ err }, 'Error during graceful shutdown');
process.exit(1);
}
}
process.on('SIGINT', () => void gracefulShutdown('SIGINT'));
process.on('SIGTERM', () => void gracefulShutdown('SIGTERM'));
process.on('unhandledRejection', (reason: unknown) => {
logger.fatal({ err: reason }, 'Unhandled promise rejection');
void gracefulShutdown('unhandledRejection');
});
process.on('uncaughtException', (err: Error) => {
logger.fatal({ err }, 'Uncaught exception');
void gracefulShutdown('uncaughtException');
});
}
+245
View File
@@ -0,0 +1,245 @@
import crypto from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
import type { Request, Response, NextFunction } from 'express';
import { config } from '../config/index.js';
import { logger } from '../utils/logger.js';
import { sanitizeErrorMessage } from '../utils/errors.js';
// ---------------------------------------------------------------------------
// Allowed hosts for DNS rebinding protection
// ---------------------------------------------------------------------------
// DNS rebinding guard removed — Bearer token auth is sufficient
// ---------------------------------------------------------------------------
// 2. Shutdown Guard (factory)
// ---------------------------------------------------------------------------
/**
* Factory that returns middleware rejecting new requests once the server has
* started its graceful shutdown sequence.
*
* @param getShuttingDown - Callback that returns `true` when shutdown is in progress.
*/
export function shutdownGuard(
getShuttingDown: () => boolean,
): (req: Request, res: Response, next: NextFunction) => void {
return (_req: Request, res: Response, next: NextFunction): void => {
if (getShuttingDown()) {
res.status(503).json({ error: 'Server is shutting down' });
return;
}
next();
};
}
// ---------------------------------------------------------------------------
// 3. Error Handler
// ---------------------------------------------------------------------------
/**
* Express error-handling middleware (four-argument signature).
*
* Logs the full error internally while returning a sanitized message to the
* client so that internal filesystem paths, tokens, and stack traces are
* never exposed.
*/
export function errorHandler(
err: Error,
req: Request,
res: Response,
_next: NextFunction,
): void {
logger.error(
{ err, method: req.method, url: req.originalUrl },
'Unhandled error in request pipeline',
);
const message = sanitizeErrorMessage(err.message || 'Internal server error');
res.status(500).json({ error: message });
}
// ---------------------------------------------------------------------------
// 4. Bearer Token Authentication
// ---------------------------------------------------------------------------
const TOKEN_FILENAME = '.api-token';
/** Cached token once loaded/generated. */
let cachedToken: string | null = null;
/**
* Load or generate the Bearer API token.
*
* - On first start, generates a random 32-byte hex token.
* - Stores it at `config.cookieDir/.api-token` with 0o600 permissions.
* - On subsequent starts, reads the existing token from disk.
* - Logs the token to console so the user can copy it.
*
* Must be called once during server startup.
*/
export function initBearerToken(): string {
if (cachedToken) return cachedToken;
const tokenPath = path.join(config.cookieDir, TOKEN_FILENAME);
// Ensure the directory exists.
try {
fs.mkdirSync(config.cookieDir, { recursive: true, mode: 0o700 });
} catch {
// Directory may already exist.
}
// Try to read an existing token.
try {
const existing = fs.readFileSync(tokenPath, 'utf-8').trim();
if (existing.length >= 32) {
cachedToken = existing;
logger.info('API Bearer token loaded from disk');
// eslint-disable-next-line no-console
console.log(`\n REST API Bearer Token: ${cachedToken}\n`);
return cachedToken;
}
} catch {
// File does not exist or is unreadable — generate a new token.
}
// Generate a new token.
cachedToken = crypto.randomBytes(32).toString('hex');
fs.writeFileSync(tokenPath, cachedToken + '\n', { mode: 0o600 });
logger.info('New API Bearer token generated and saved');
// eslint-disable-next-line no-console
console.log(`\n REST API Bearer Token: ${cachedToken}\n`);
return cachedToken;
}
/**
* Express middleware that validates a `Bearer <token>` header against the
* stored API token. Uses `crypto.timingSafeEqual` to prevent timing attacks.
*
* Apply to `/api/*` routes only.
*/
export function bearerAuth(
req: Request,
res: Response,
next: NextFunction,
): void {
const authHeader = req.headers.authorization;
if (!authHeader || !authHeader.startsWith('Bearer ')) {
res.status(401).json({
success: false,
error: { code: 'UNAUTHORIZED', message: 'Missing or invalid Authorization header' },
});
return;
}
const provided = authHeader.slice(7); // Strip "Bearer "
if (!cachedToken) {
res.status(500).json({
success: false,
error: { code: 'INTERNAL', message: 'API token not initialized' },
});
return;
}
// Use timing-safe comparison to prevent timing attacks.
const providedBuf = Buffer.from(provided, 'utf-8');
const expectedBuf = Buffer.from(cachedToken, 'utf-8');
if (
providedBuf.length !== expectedBuf.length ||
!crypto.timingSafeEqual(providedBuf, expectedBuf)
) {
res.status(403).json({
success: false,
error: { code: 'FORBIDDEN', message: 'Invalid Bearer token' },
});
return;
}
next();
}
// ---------------------------------------------------------------------------
// 5. Rate Limiter (in-memory, per-IP)
// ---------------------------------------------------------------------------
interface RateLimiterOptions {
/** Time window in milliseconds. */
windowMs: number;
/** Maximum number of requests allowed in the window. */
maxRequests: number;
}
interface RateLimiterEntry {
/** Request timestamps within the current window. */
timestamps: number[];
}
/**
* Create an in-memory per-IP rate limiter middleware.
*
* Returns 429 when the rate limit is exceeded. Old entries are automatically
* cleaned up every 60 seconds to prevent memory leaks.
*/
export function rateLimiter(opts: RateLimiterOptions) {
const store = new Map<string, RateLimiterEntry>();
// Periodic cleanup of stale entries.
const cleanupInterval = setInterval(() => {
const now = Date.now();
for (const [ip, entry] of store) {
entry.timestamps = entry.timestamps.filter((t) => now - t < opts.windowMs);
if (entry.timestamps.length === 0) {
store.delete(ip);
}
}
}, 60_000);
// Do not let the cleanup timer keep the process alive during shutdown.
if (typeof cleanupInterval === 'object' && 'unref' in cleanupInterval) {
cleanupInterval.unref();
}
return (req: Request, res: Response, next: NextFunction): void => {
const ip = req.ip ?? req.socket.remoteAddress ?? 'unknown';
const now = Date.now();
let entry = store.get(ip);
if (!entry) {
entry = { timestamps: [] };
store.set(ip, entry);
}
// Remove timestamps outside the current window.
entry.timestamps = entry.timestamps.filter((t) => now - t < opts.windowMs);
if (entry.timestamps.length >= opts.maxRequests) {
const retryAfterMs = opts.windowMs - (now - (entry.timestamps[0] ?? now));
const retryAfterSec = Math.ceil(retryAfterMs / 1000);
res.set('Retry-After', String(retryAfterSec));
res.status(429).json({
success: false,
error: {
code: 'RATE_LIMITED',
message: `Too many requests. Try again in ${String(retryAfterSec)} seconds.`,
},
});
return;
}
entry.timestamps.push(now);
next();
};
}
+281
View File
@@ -0,0 +1,281 @@
import { open, stat, unlink, writeFile, mkdir } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { randomUUID } from "node:crypto";
import { logger } from "./logger.js";
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
const DEFAULT_MAX_SIZE_MB = 20;
const BYTES_PER_MB = 1024 * 1024;
/** Minimum bytes we need to read to identify all supported formats. */
const MAGIC_BYTES_LEN = 12;
/**
* Map Content-Type values to file extensions. Used as a fallback when the
* URL does not contain a recognisable extension.
*/
const MIME_TO_EXT: Record<string, string> = {
"image/jpeg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
"video/mp4": ".mp4",
};
// ---------------------------------------------------------------------------
// Magic-byte detection
// ---------------------------------------------------------------------------
function detectMimeType(header: Buffer): string | undefined {
// JPEG: starts with FF D8 FF
if (header[0] === 0xff && header[1] === 0xd8 && header[2] === 0xff) {
return "image/jpeg";
}
// PNG: starts with 89 50 4E 47
if (
header[0] === 0x89 &&
header[1] === 0x50 &&
header[2] === 0x4e &&
header[3] === 0x47
) {
return "image/png";
}
// WebP: RIFF....WEBP (bytes 0-3 = "RIFF", bytes 8-11 = "WEBP")
if (
header[0] === 0x52 &&
header[1] === 0x49 &&
header[2] === 0x46 &&
header[3] === 0x46 &&
header[8] === 0x57 &&
header[9] === 0x45 &&
header[10] === 0x42 &&
header[11] === 0x50
) {
return "image/webp";
}
// MP4: "ftyp" at byte offset 4
if (
header[4] === 0x66 &&
header[5] === 0x74 &&
header[6] === 0x79 &&
header[7] === 0x70
) {
return "video/mp4";
}
return undefined;
}
// ---------------------------------------------------------------------------
// validateMediaPath
// ---------------------------------------------------------------------------
/**
* Validate that a local media file exists, is within size limits, and is one
* of the supported media types (JPEG, PNG, WebP, MP4).
*
* @returns The resolved absolute path to the file.
*/
export async function validateMediaPath(
filePath: string,
opts?: { maxSizeMB?: number },
): Promise<string> {
const resolved = path.resolve(filePath);
// Guard against path traversal -- reject if the *original* input tries to
// escape via ".." segments. We check the raw input rather than the resolved
// path so that a legitimate directory named ".." is not silently accepted.
if (filePath.includes("..")) {
throw new Error(
`Path traversal detected: the path must not contain ".." segments`,
);
}
// Existence & size check
let stats: Awaited<ReturnType<typeof stat>>;
try {
stats = await stat(resolved);
} catch {
throw new Error(`File not found: ${resolved}`);
}
if (!stats.isFile()) {
throw new Error(`Not a regular file: ${resolved}`);
}
const maxBytes = (opts?.maxSizeMB ?? DEFAULT_MAX_SIZE_MB) * BYTES_PER_MB;
if (stats.size > maxBytes) {
const sizeMB = (stats.size / BYTES_PER_MB).toFixed(2);
const limitMB = (maxBytes / BYTES_PER_MB).toFixed(0);
throw new Error(
`File too large: ${sizeMB} MB exceeds the ${limitMB} MB limit`,
);
}
if (stats.size < MAGIC_BYTES_LEN) {
throw new Error(`File too small to identify media type (${stats.size} bytes)`);
}
// MIME type check via magic bytes
const fd = await open(resolved, "r");
try {
const buf = Buffer.alloc(MAGIC_BYTES_LEN);
await fd.read(buf, 0, MAGIC_BYTES_LEN, 0);
const mime = detectMimeType(buf);
if (mime === undefined) {
throw new Error(
`Unsupported media type for file: ${resolved}. ` +
`Supported types: JPEG, PNG, WebP, MP4`,
);
}
logger.debug({ path: resolved, mime, bytes: stats.size }, "媒体文件校验通过");
} finally {
await fd.close();
}
return resolved;
}
// ---------------------------------------------------------------------------
// downloadFile
// ---------------------------------------------------------------------------
/**
* Derive a file extension from the URL path or the Content-Type header.
* Falls back to an empty string when neither source yields a known extension.
*/
function deriveExtension(url: string, contentType: string | null): string {
// Try to pull an extension from the URL pathname first.
try {
const pathname = new URL(url).pathname;
const ext = path.extname(pathname).toLowerCase();
if (ext && ext.length <= 5) {
return ext;
}
} catch {
// URL parsing failed -- fall through to Content-Type.
}
// Fall back to Content-Type header.
if (contentType) {
const baseMime = contentType.split(";")[0]?.trim().toLowerCase();
if (baseMime) {
const ext = MIME_TO_EXT[baseMime];
if (ext) {
return ext;
}
}
}
return "";
}
/**
* Download a file from a remote URL and save it into `destDir` with a random
* filename.
*
* @returns The absolute path to the downloaded file.
*/
export async function downloadFile(
url: string,
destDir: string,
): Promise<string> {
const resolvedDir = path.resolve(destDir);
// Ensure destination directory exists (recursive in case parents are missing).
await mkdir(resolvedDir, { recursive: true });
logger.debug({ url, destDir: resolvedDir }, "开始下载文件");
const response = await fetch(url);
if (!response.ok) {
throw new Error(
`Download failed: HTTP ${String(response.status)} ${response.statusText} for ${url}`,
);
}
const contentType = response.headers.get("content-type");
const ext = deriveExtension(url, contentType);
const filename = `${randomUUID()}${ext}`;
const destPath = path.join(resolvedDir, filename);
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
await writeFile(destPath, buffer, { mode: 0o600 });
logger.debug(
{ path: destPath, bytes: buffer.length, mime: contentType },
"文件下载完成",
);
return destPath;
}
// ---------------------------------------------------------------------------
// cleanupFile
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// resolveMediaInput
// ---------------------------------------------------------------------------
/**
* Resolve a media input that is either a local file path or a remote URL.
*
* - If `input` is an HTTP/HTTPS URL, the file is downloaded to a temporary
* directory, validated, and returned with `temporary: true` so the caller
* can clean it up after use.
* - Otherwise the path is validated in-place and returned with `temporary: false`.
*
* @returns `{ path, temporary }` where `path` is the local file path ready for use.
*/
export async function resolveMediaInput(
input: string,
opts?: { maxSizeMB?: number; tempDir?: string },
): Promise<{ path: string; temporary: boolean }> {
if (input.startsWith("http://") || input.startsWith("https://")) {
const dir = opts?.tempDir ?? path.join(os.tmpdir(), "social-mcp");
const downloaded = await downloadFile(input, dir);
try {
await validateMediaPath(downloaded, { maxSizeMB: opts?.maxSizeMB });
} catch (err) {
await cleanupFile(downloaded).catch(() => undefined);
throw err;
}
return { path: downloaded, temporary: true };
}
const validated = await validateMediaPath(input, { maxSizeMB: opts?.maxSizeMB });
return { path: validated, temporary: false };
}
// ---------------------------------------------------------------------------
// cleanupFile
// ---------------------------------------------------------------------------
/**
* Delete a local file. Silently succeeds if the file does not exist.
*/
export async function cleanupFile(filePath: string): Promise<void> {
try {
await unlink(filePath);
logger.debug({ path: filePath }, "临时文件已清理");
} catch (err: unknown) {
// ENOENT means the file was already gone -- that is fine.
if (err instanceof Error && "code" in err && err.code === "ENOENT") {
return;
}
throw err;
}
}
+152
View File
@@ -0,0 +1,152 @@
import { logger } from './logger.js';
// ---------------------------------------------------------------------------
// Error classification
// ---------------------------------------------------------------------------
export enum ErrorCategory {
TIMEOUT = 'TIMEOUT',
AUTH_REQUIRED = 'AUTH_REQUIRED',
CAPTCHA_REQUIRED = 'CAPTCHA_REQUIRED',
SELECTOR_NOT_FOUND = 'SELECTOR_NOT_FOUND',
NETWORK = 'NETWORK',
PLATFORM_ERROR = 'PLATFORM_ERROR',
INTERNAL = 'INTERNAL',
}
/**
* Inspect an Error's `message` and `name` to determine which category it
* belongs to. The checks are intentionally broad so that errors surfaced by
* Playwright, Puppeteer, or native fetch all get classified correctly.
*/
export function classifyError(err: Error): ErrorCategory {
const haystack = `${err.name} ${err.message}`.toLowerCase();
if (
haystack.includes('captcha') ||
haystack.includes('show_captcha') ||
haystack.includes('验证码')
) {
return ErrorCategory.CAPTCHA_REQUIRED;
}
// Selector check BEFORE timeout — Playwright's selector timeout message
// is "Timeout waiting for selector ..." which contains both keywords.
// The more specific match must come first.
if (
haystack.includes('waiting for selector') ||
haystack.includes('找不到元素')
) {
return ErrorCategory.SELECTOR_NOT_FOUND;
}
if (haystack.includes('timeout') || err.name === 'TimeoutError') {
return ErrorCategory.TIMEOUT;
}
if (
haystack.includes('net::err_') ||
haystack.includes('platform_error') ||
haystack.includes('平台错误')
) {
return ErrorCategory.PLATFORM_ERROR;
}
if (haystack.includes('login') || haystack.includes('登录')) {
return ErrorCategory.AUTH_REQUIRED;
}
return ErrorCategory.INTERNAL;
}
// ---------------------------------------------------------------------------
// Message sanitization
// ---------------------------------------------------------------------------
/**
* Strip potentially sensitive or noisy information from an error message
* before it is returned to the MCP client.
*
* - File-system paths (/xxx/yyy/...) -> [path]
* - URLs (http(s)://...) -> [url]
* - Long hex strings (>= 32 chars) -> [hash]
* - Truncated to 200 characters
*/
export function sanitizeErrorMessage(message: string): string {
let sanitized = message;
// Replace URLs first so that the path regex does not partially match them.
sanitized = sanitized.replace(/https?:\/\/[^\s)'"]+/g, '[url]');
// Replace absolute file-system paths (Unix-style).
sanitized = sanitized.replace(/\/(?:[^\s/]+\/)+[^\s/)'":]*/g, '[path]');
// Replace long hexadecimal strings (session ids, hashes, tokens, etc.).
sanitized = sanitized.replace(/[0-9a-fA-F]{32,}/g, '[hash]');
// Truncate to 200 characters.
if (sanitized.length > 200) {
sanitized = sanitized.slice(0, 200);
}
return sanitized;
}
// ---------------------------------------------------------------------------
// MCP tool result type
// ---------------------------------------------------------------------------
export type McpToolResult = {
content: Array<{ type: 'text'; text: string }>;
isError?: boolean;
};
// ---------------------------------------------------------------------------
// Error-handling wrapper
// ---------------------------------------------------------------------------
/**
* Execute an MCP tool handler inside a try/catch that automatically
* classifies, sanitizes, and logs any thrown error before returning a
* well-structured MCP error response.
*
* Usage:
* ```ts
* const result = await withErrorHandling('publish_post', async () => {
* // ... tool logic that returns McpToolResult
* });
* ```
*/
export async function withErrorHandling(
toolName: string,
fn: () => Promise<McpToolResult>,
): Promise<McpToolResult> {
try {
return await fn();
} catch (caught: unknown) {
const err =
caught instanceof Error ? caught : new Error(String(caught));
const category = classifyError(err);
const sanitized = sanitizeErrorMessage(err.message);
logger.error(
{ tool: toolName, category, err },
'工具执行失败',
);
const payload = JSON.stringify({
success: false,
error: {
tool: toolName,
code: category,
message: sanitized,
},
});
return {
content: [{ type: 'text', text: payload }],
isError: true,
};
}
}
+115
View File
@@ -0,0 +1,115 @@
import crypto from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
import { config } from '../config/index.js';
import { DatabaseSync } from './sqlite.js';
interface IdempotencyRow {
tool_name: string;
request_id: string;
input_hash: string;
response_json: string;
created_at: number;
}
export interface IdempotencyRecord {
toolName: string;
requestId: string;
inputHash: string;
responseData: unknown;
createdAt: string;
}
const DB_FILENAME = 'idempotency.db';
function canonicalize(value: unknown): unknown {
if (Array.isArray(value)) {
return value.map(canonicalize);
}
if (value && typeof value === 'object') {
const input = value as Record<string, unknown>;
const out: Record<string, unknown> = {};
for (const key of Object.keys(input).sort()) {
out[key] = canonicalize(input[key]);
}
return out;
}
return value;
}
export function computeIdempotencyHash(input: unknown): string {
const canonical = canonicalize(input);
return crypto
.createHash('sha256')
.update(JSON.stringify(canonical))
.digest('hex');
}
export class IdempotencyStore {
private readonly db: InstanceType<typeof DatabaseSync>;
constructor(baseDir = config.cookieDir, dbFilename = DB_FILENAME) {
fs.mkdirSync(baseDir, { recursive: true, mode: 0o700 });
const dbPath = path.join(baseDir, dbFilename);
this.db = new DatabaseSync(dbPath);
this.db.exec(`
CREATE TABLE IF NOT EXISTS idempotency_requests (
tool_name TEXT NOT NULL,
request_id TEXT NOT NULL,
input_hash TEXT NOT NULL,
response_json TEXT NOT NULL,
created_at INTEGER NOT NULL,
PRIMARY KEY (tool_name, request_id)
);
`);
}
get(toolName: string, requestId: string): IdempotencyRecord | null {
const stmt = this.db.prepare(`
SELECT tool_name, request_id, input_hash, response_json, created_at
FROM idempotency_requests
WHERE tool_name = ? AND request_id = ?
LIMIT 1
`);
const row = stmt.get(toolName, requestId) as IdempotencyRow | undefined;
if (!row) return null;
return {
toolName: row.tool_name,
requestId: row.request_id,
inputHash: row.input_hash,
responseData: JSON.parse(row.response_json),
createdAt: new Date(row.created_at).toISOString(),
};
}
put(
toolName: string,
requestId: string,
inputHash: string,
responseData: unknown,
): void {
const stmt = this.db.prepare(`
INSERT OR REPLACE INTO idempotency_requests (
tool_name, request_id, input_hash, response_json, created_at
) VALUES (?, ?, ?, ?, ?)
`);
stmt.run(
toolName,
requestId,
inputHash,
JSON.stringify(responseData),
Date.now(),
);
}
}
let singleton: IdempotencyStore | null = null;
export function getIdempotencyStore(): IdempotencyStore {
if (!singleton) {
singleton = new IdempotencyStore();
}
return singleton;
}
+68
View File
@@ -0,0 +1,68 @@
import pino from "pino";
const isProduction = process.env["NODE_ENV"] === "production";
// In production, suppress Playwright debug output that bypasses pino.
if (isProduction) {
delete process.env["DEBUG"];
}
const redactPaths: string[] = [
// Auth & credentials
"**.cookie",
"**.cookies",
"**.set-cookie",
"**.authorization",
"**.password",
"**.secret",
// Tokens
"**.token",
"**.xsec_token",
"**.access_token",
"**.refresh_token",
// API keys
"**.api_key",
"**.apikey",
// Sessions
"**.sessionid",
"**.session_id",
// Playwright StorageState structures
"**.cookies[*].value",
"**.origins[*].localStorage[*].value",
];
const errorSerializer = (err: Error): Record<string, unknown> => {
const serialized: Record<string, unknown> = {
type: err.constructor?.name ?? "Error",
message: err.message,
};
if (!isProduction && err.stack) {
serialized["stack"] = err.stack;
}
return serialized;
};
export const logger: pino.Logger = pino({
level: process.env["LOG_LEVEL"] ?? "info",
redact: {
paths: redactPaths,
censor: "[REDACTED]",
},
serializers: {
err: errorSerializer,
error: errorSerializer,
},
...(isProduction
? {}
: {
transport: {
target: "pino-pretty",
},
}),
});
+10
View File
@@ -0,0 +1,10 @@
import { createRequire } from 'node:module';
import type { DatabaseSync as DatabaseSyncType } from 'node:sqlite';
const nodeRequire = createRequire(import.meta.url);
const loaded = nodeRequire('node:sqlite') as {
DatabaseSync: typeof DatabaseSyncType;
};
export const DatabaseSync = loaded.DatabaseSync;