From 8b39520ec7052776c83f55f42491e9f543814f45 Mon Sep 17 00:00:00 2001 From: kurihada Date: Sun, 1 Mar 2026 17:59:39 +0800 Subject: [PATCH] =?UTF-8?q?fix(xhs):=20=E7=99=BB=E5=BD=95=E4=BA=8C?= =?UTF-8?q?=E7=BB=B4=E7=A0=81=E6=B5=81=E7=A8=8B=E6=94=B9=E7=94=A8=E9=9D=9E?= =?UTF-8?q?=E6=97=A0=E5=A4=B4=E6=B5=8F=E8=A7=88=E5=99=A8=E7=BB=95=E8=BF=87?= =?UTF-8?q?IP=E9=A3=8E=E6=8E=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XHS对未登录的无头浏览器请求触发风控(code 300012: IP存在风险), 重定向到安全限制页导致二维码选择器永远匹配不到。 - getLoginQRCode 单独启动 headless:false 浏览器,登录弹框可正常加载 - 扫码成功后直接从登录 context 提取 storageState 保存到磁盘 - 调用 browser.clearContext() 让主 BrowserManager 下次重新加载新 cookies - waitForLoginAndRelease 改为接收 BrowserContext 并关闭登录浏览器 - 修正 qrCodeImage 选择器:'.login-container .qrcode-img' → 'img.qrcode-img' --- src/platforms/xiaohongshu/login.ts | 108 ++++++++++++++++++------- src/platforms/xiaohongshu/selectors.ts | 4 +- 2 files changed, 83 insertions(+), 29 deletions(-) diff --git a/src/platforms/xiaohongshu/login.ts b/src/platforms/xiaohongshu/login.ts index 3c94a5e..f90b89a 100644 --- a/src/platforms/xiaohongshu/login.ts +++ b/src/platforms/xiaohongshu/login.ts @@ -1,6 +1,8 @@ -import type { Page } from 'rebrowser-playwright'; +import { chromium } from 'rebrowser-playwright'; +import type { Page, BrowserContext } from 'rebrowser-playwright'; import type { BrowserManager } from '../../browser/manager.js'; +import { config } from '../../config/index.js'; import { logger } from '../../utils/logger.js'; import { cookieStore } from '../../cookie/store.js'; import { XHS_SELECTORS } from './selectors.js'; @@ -16,6 +18,9 @@ const EXPLORE_URL = 'https://www.xiaohongshu.com/explore'; /** How long to wait for the user to scan the QR code (4 minutes). */ const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000; +/** Safety timeout for the login browser window (5 minutes). */ +const LOGIN_BROWSER_SAFETY_MS = 5 * 60 * 1000; + const log = logger.child({ module: 'xhs-login' }); // --------------------------------------------------------------------------- @@ -55,26 +60,68 @@ export async function checkLoginStatus(page: Page): Promise { // --------------------------------------------------------------------------- /** - * Open the explore page, trigger the login modal if needed, and extract the - * QR code image data. + * Open the explore page in a **non-headless** browser, wait for the login + * modal QR code, and return the image data. * - * Because the user must scan the QR code with their phone (which takes an - * indeterminate amount of time), this function uses `acquirePage` instead of - * `withPage`. A fire-and-forget background task waits for the scan to - * complete, saves cookies, and releases the page. + * Why non-headless? XHS detects unauthenticated headless requests and + * redirects them to a security-restriction error page (code 300012: + * "IP存在风险"). Once the user scans the QR code and cookies are saved, + * all subsequent operations can use the normal headless BrowserManager. * - * @param browser - The shared BrowserManager instance. + * A fire-and-forget background task waits for the scan to complete, saves + * cookies to disk, clears the main BrowserManager context (so it reloads + * the fresh cookies on next use), then closes the login browser. + * + * @param browser - The shared BrowserManager instance (used to clear its + * cached context after login so it picks up new cookies). * @returns QR code data or an indication that the user is already logged in. */ export async function getLoginQRCode( browser: BrowserManager, ): Promise { - const { page, release } = await browser.acquirePage(PLATFORM); + log.info('Launching non-headless browser for QR code login'); + + const loginBrowser = await chromium.launch({ + headless: false, + ...(config.browserBin ? { executablePath: config.browserBin } : {}), + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + ], + }); + + const ctx = await loginBrowser.newContext(); + const page = await ctx.newPage(); + + let released = false; + + const release = async (): Promise => { + if (released) return; + released = true; + clearTimeout(safetyTimer); + await loginBrowser.close().catch((err: unknown) => { + log.warn({ err }, 'Failed to close login browser'); + }); + }; + + const safetyTimer = setTimeout(() => { + if (!released) { + log.warn('Login browser safety timeout: closing after 5 minutes'); + void release(); + } + }, LOGIN_BROWSER_SAFETY_MS); + + if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) { + safetyTimer.unref(); + } try { await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' }); + log.debug({ url: page.url() }, 'Login browser navigated'); - // Check whether the user is already logged in. + // Check whether the user is already logged in via saved cookies. const alreadyLoggedIn = await page .waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 }) .then(() => true) @@ -85,9 +132,10 @@ export async function getLoginQRCode( return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' }; } - // If the QR code is not visible yet, click the login button to open it. + // The login modal auto-appears after a few seconds (no button click needed). + // Wait up to 20s; if still absent try the login button as a fallback. const qrVisible = await page - .waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 3_000 }) + .waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 20_000 }) .then(() => true) .catch(() => false); @@ -114,11 +162,8 @@ export async function getLoginQRCode( throw new Error('QR code image src attribute is empty'); } - // Fire-and-forget: wait for the user to scan the QR code in the - // background. On success, save cookies and release the page. On - // failure or timeout, just release the page. The `.catch()` ensures - // no unhandled rejection escapes. - waitForLoginAndRelease(page, browser, release).catch((err: unknown) => { + // Fire-and-forget: wait for the user to scan the QR code in the background. + waitForLoginAndRelease(page, ctx, browser, release).catch((err: unknown) => { log.error({ err }, 'Login wait flow encountered an unexpected error'); }); @@ -128,8 +173,6 @@ export async function getLoginQRCode( timeout: '4m', }; } catch (err) { - // If anything goes wrong before we hand off to the background task, - // make sure the page is released. await release(); throw err; } @@ -155,15 +198,19 @@ export async function deleteCookies(browser: BrowserManager): Promise { /** * Background task that waits for the logged-in indicator to appear (meaning - * the user has scanned the QR code). On success it persists cookies. The - * page is released in all cases (success, timeout, error) via `finally`. + * the user has scanned the QR code). * - * `release` is idempotent (guaranteed by BrowserManager.acquirePage), so - * even if the safety-net timer inside acquirePage fires concurrently, there - * is no double-close. + * On success: + * 1. Extracts storageState from the login browser context. + * 2. Saves cookies to disk via CookieStore. + * 3. Clears the main BrowserManager's cached context so the next + * headless operation creates a fresh context that reloads the cookies. + * + * The login browser is closed in all cases (success, timeout, error). */ async function waitForLoginAndRelease( page: Page, + ctx: BrowserContext, browser: BrowserManager, release: () => Promise, ): Promise { @@ -173,10 +220,17 @@ async function waitForLoginAndRelease( }); log.info('QR code scanned — login detected, saving cookies'); - await browser.saveCookies(PLATFORM); + + const state = await ctx.storageState(); + await cookieStore.save(PLATFORM, state); + + // Clear the headless BrowserManager's cached context so the next + // withPage() call creates a new one that restores the saved cookies. + await browser.clearContext(PLATFORM); + + log.info('Cookies saved and headless context refreshed'); } catch { - // Timeout or page closed — not an error, just means the user did not - // scan in time (or the page was released by the safety timer). + // Timeout or browser closed — not an error. log.debug('Login wait ended without successful scan'); } finally { await release(); diff --git a/src/platforms/xiaohongshu/selectors.ts b/src/platforms/xiaohongshu/selectors.ts index a9feaa8..ddb0805 100644 --- a/src/platforms/xiaohongshu/selectors.ts +++ b/src/platforms/xiaohongshu/selectors.ts @@ -4,8 +4,8 @@ export const XHS_SELECTORS = { login: { - /** QR code image on the login modal / page. */ - qrCodeImage: '.login-container .qrcode-img', + /** QR code image on the login modal (auto-appears after a few seconds). */ + qrCodeImage: 'img.qrcode-img', /** Element present only when the user is logged in (sidebar channel link). */ loggedInIndicator: '.user .link-wrapper .channel', /** The "login" button that opens the QR code modal (if not already shown). */