import { chromium, type Browser, type BrowserContext, type Page, } from 'rebrowser-playwright'; import { config } from '../config/index.js'; import { cookieStore } from '../cookie/store.js'; import { logger } from '../utils/logger.js'; // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- /** How long acquirePage() waits before auto-releasing the page (5 minutes). */ const ACQUIRE_SAFETY_TIMEOUT_MS = 5 * 60_000; // --------------------------------------------------------------------------- // BrowserManager // --------------------------------------------------------------------------- export class BrowserManager { // -- State ---------------------------------------------------------------- private browser: Browser | null = null; private contexts = new Map(); private queues = new Map>(); private queueDepths = new Map(); /** * Mutex-style promise that prevents concurrent browser launches. * While a launch is in-flight every caller awaits the same promise. */ private launchPromise: Promise | null = null; // -- Public API ----------------------------------------------------------- /** * Execute `fn` on a fresh page inside the platform-specific context. * * Operations are serialised per-platform through a promise chain so that * at most one page is active per platform at any time. Back-pressure is * enforced via `config.maxQueueDepth`. * * @param platform - Platform identifier (e.g. "twitter", "xiaohongshu"). * @param fn - Async callback that receives the page. * @param timeoutMs - Optional timeout override (defaults to the * `operationTimeouts.default` value from config). * @returns The value returned by `fn`. */ async withPage( platform: string, fn: (page: Page) => Promise, timeoutMs?: number, ): Promise { // -- Back-pressure check ------------------------------------------------ const currentDepth = this.queueDepths.get(platform) ?? 0; if (currentDepth >= config.maxQueueDepth) { throw new Error( `Queue full for platform "${platform}" (depth=${currentDepth}, ` + `max=${config.maxQueueDepth}). Try again later.`, ); } this.queueDepths.set(platform, currentDepth + 1); // -- Resolve effective timeout ------------------------------------------ const effectiveTimeout = timeoutMs ?? config.operationTimeouts['default'] ?? 60_000; // -- Build the task and chain onto the per-platform queue --------------- const previous = this.queues.get(platform) ?? Promise.resolve(); const task: Promise = previous.then(async () => { const browser = await this.ensureBrowser(); // The browser may have disconnected while this task was queued. if (!browser.isConnected()) { throw new Error('Browser disconnected while waiting in queue'); } const ctx = await this.getContext(platform); const page = await ctx.newPage(); page.setDefaultTimeout(effectiveTimeout); page.setDefaultNavigationTimeout(effectiveTimeout); try { // Race the user function against a hard timeout. clearTimeout // is handled implicitly: when fn resolves first the timeout // promise is simply abandoned and its timer unref'd so it cannot // keep the process alive. const result = await Promise.race([ fn(page), this.createTimeout(effectiveTimeout, platform), ]); return result; } finally { await page.close().catch((err: unknown) => { logger.warn({ err, platform }, 'Failed to close page'); }); } }); // Swallow errors so the promise chain continues for the next caller. // The actual rejection is still returned to **this** caller via `task`. const chainContinuation = task.then( () => {}, () => {}, ); // Decrement queue depth when this task settles, regardless of outcome. void chainContinuation.finally(() => { const depth = this.queueDepths.get(platform) ?? 1; if (depth <= 1) { this.queueDepths.delete(platform); } else { this.queueDepths.set(platform, depth - 1); } }); this.queues.set(platform, chainContinuation); return task; } /** * Acquire a page that the caller manages manually (e.g. for interactive * login flows). The caller **must** call `release()` when finished. * * A safety-net timer auto-releases the page after 5 minutes to prevent * resource leaks if the caller forgets. * * @param platform - Platform identifier. * @returns Object with `page` and an idempotent `release` function. */ async acquirePage( platform: string, ): Promise<{ page: Page; release: () => Promise }> { await this.ensureBrowser(); const ctx = await this.getContext(platform); const page = await ctx.newPage(); let released = false; const release = async (): Promise => { if (released) return; released = true; clearTimeout(safetyTimer); await page.close().catch((err: unknown) => { logger.warn({ err, platform }, 'Failed to close acquired page'); }); }; const safetyTimer = setTimeout(() => { if (!released) { logger.warn( { platform }, `acquirePage safety timeout: auto-releasing page after ${ACQUIRE_SAFETY_TIMEOUT_MS}ms`, ); void release(); } }, ACQUIRE_SAFETY_TIMEOUT_MS); // Prevent the timer from keeping the Node.js process alive. if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) { safetyTimer.unref(); } return { page, release }; } /** * Save the current cookie / storage state of a platform's browser context * to disk via the CookieStore. * * @param platform - Platform identifier whose context should be persisted. */ async saveCookies(platform: string): Promise { const ctx = this.contexts.get(platform); if (!ctx) { logger.warn( { platform }, 'saveCookies called but no context exists for platform', ); return; } const state = await ctx.storageState(); // Playwright's storageState() return type is structurally compatible with // our CookieStore's StorageState interface. await cookieStore.save(platform, state); logger.debug({ platform }, 'Cookies saved'); } /** * Wait for every in-flight platform queue to settle. Useful during * graceful shutdown so that running operations finish before teardown. */ async drain(): Promise { const pending = [...this.queues.values()]; if (pending.length === 0) return; logger.info( { queueCount: pending.length }, 'Draining browser operation queues', ); await Promise.allSettled(pending); logger.info('All browser operation queues drained'); } /** * Close all browser contexts and the browser itself, then reset internal * state. Safe to call multiple times. */ async close(): Promise { // Close every context individually so callers that need to flush // storageState can do so before calling close(). const contextClosePromises = [...this.contexts.values()].map((ctx) => ctx.close().catch((err: unknown) => { logger.warn({ err }, 'Error closing browser context during shutdown'); }), ); await Promise.all(contextClosePromises); if (this.browser) { await this.browser.close().catch((err: unknown) => { logger.warn({ err }, 'Error closing browser during shutdown'); }); } this.browser = null; this.contexts.clear(); this.queues.clear(); this.queueDepths.clear(); this.launchPromise = null; logger.info('BrowserManager closed'); } // -- Private helpers ------------------------------------------------------ /** * Ensure the browser is launched and connected. Uses a launch mutex so * that concurrent callers share a single launch attempt instead of * spawning multiple browser processes. */ private async ensureBrowser(): Promise { if (this.browser?.isConnected()) { return this.browser; } // If another caller is already launching, piggy-back on that promise. if (this.launchPromise) { return this.launchPromise; } this.launchPromise = this.launchBrowser(); try { const browser = await this.launchPromise; return browser; } finally { this.launchPromise = null; } } /** * Launch a Chromium instance via rebrowser-playwright. */ private async launchBrowser(): Promise { logger.info( { headless: config.headless, browserBin: config.browserBin ?? 'default' }, 'Launching browser', ); const browser = await chromium.launch({ headless: config.headless, ...(config.browserBin ? { executablePath: config.browserBin } : {}), }); // React to unexpected disconnects (e.g. browser crash, OOM kill). browser.on('disconnected', () => { logger.error('Browser disconnected unexpectedly'); this.browser = null; this.contexts.clear(); // launchPromise is intentionally NOT cleared here so the next caller // that calls ensureBrowser() will attempt a fresh launch. }); this.browser = browser; logger.info('Browser launched successfully'); return browser; } /** * Get (or lazily create) a BrowserContext for the given platform. * * On first creation we attempt to restore cookies from the CookieStore * so that sessions survive process restarts. */ private async getContext(platform: string): Promise { const existing = this.contexts.get(platform); if (existing) return existing; if (!this.browser) { throw new Error('Cannot create context: browser is not launched'); } // Attempt to restore a previous session's storage state from disk. let storageState: Awaited> | undefined; try { const loaded = await cookieStore.load(platform); if (loaded) { storageState = loaded; logger.debug( { platform, cookieCount: loaded.cookies.length }, 'Restoring saved cookies into new context', ); } } catch (err: unknown) { // Cookie load failure should never prevent context creation. logger.warn( { err, platform }, 'Failed to load saved cookies -- creating fresh context', ); } const ctx = await this.browser.newContext( storageState ? { storageState } : {}, ); this.contexts.set(platform, ctx); logger.debug({ platform }, 'Browser context created'); return ctx; } /** * Create a promise that rejects after `ms` milliseconds, used as the * timeout arm in `Promise.race` inside `withPage`. * * The timer is `unref()`'d so it cannot keep the Node.js event loop alive * during graceful shutdown. When the user's function wins the race the * dangling timeout promise is harmlessly garbage-collected. */ private createTimeout(ms: number, platform: string): Promise { return new Promise((_resolve, reject) => { const timer = setTimeout(() => { reject( new Error( `Operation timed out after ${ms}ms for platform "${platform}"`, ), ); }, ms); // Prevent the timeout from keeping the process alive during shutdown. if (typeof timer === 'object' && 'unref' in timer) { timer.unref(); } }); } } // --------------------------------------------------------------------------- // Singleton export // --------------------------------------------------------------------------- export const browserManager = new BrowserManager();