fix(xhs): 登录二维码流程改用非无头浏览器绕过IP风控

XHS对未登录的无头浏览器请求触发风控(code 300012: IP存在风险),
重定向到安全限制页导致二维码选择器永远匹配不到。

- getLoginQRCode 单独启动 headless:false 浏览器,登录弹框可正常加载
- 扫码成功后直接从登录 context 提取 storageState 保存到磁盘
- 调用 browser.clearContext() 让主 BrowserManager 下次重新加载新 cookies
- waitForLoginAndRelease 改为接收 BrowserContext 并关闭登录浏览器
- 修正 qrCodeImage 选择器:'.login-container .qrcode-img' → 'img.qrcode-img'
This commit is contained in:
2026-03-01 17:59:39 +08:00
parent 838b244929
commit 8b39520ec7
2 changed files with 83 additions and 29 deletions
+81 -27
View File
@@ -1,6 +1,8 @@
import type { Page } from 'rebrowser-playwright';
import { chromium } from 'rebrowser-playwright';
import type { Page, BrowserContext } from 'rebrowser-playwright';
import type { BrowserManager } from '../../browser/manager.js';
import { config } from '../../config/index.js';
import { logger } from '../../utils/logger.js';
import { cookieStore } from '../../cookie/store.js';
import { XHS_SELECTORS } from './selectors.js';
@@ -16,6 +18,9 @@ const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
/** How long to wait for the user to scan the QR code (4 minutes). */
const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000;
/** Safety timeout for the login browser window (5 minutes). */
const LOGIN_BROWSER_SAFETY_MS = 5 * 60 * 1000;
const log = logger.child({ module: 'xhs-login' });
// ---------------------------------------------------------------------------
@@ -55,26 +60,68 @@ export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
// ---------------------------------------------------------------------------
/**
* Open the explore page, trigger the login modal if needed, and extract the
* QR code image data.
* Open the explore page in a **non-headless** browser, wait for the login
* modal QR code, and return the image data.
*
* Because the user must scan the QR code with their phone (which takes an
* indeterminate amount of time), this function uses `acquirePage` instead of
* `withPage`. A fire-and-forget background task waits for the scan to
* complete, saves cookies, and releases the page.
* Why non-headless? XHS detects unauthenticated headless requests and
* redirects them to a security-restriction error page (code 300012:
* "IP存在风险"). Once the user scans the QR code and cookies are saved,
* all subsequent operations can use the normal headless BrowserManager.
*
* @param browser - The shared BrowserManager instance.
* A fire-and-forget background task waits for the scan to complete, saves
* cookies to disk, clears the main BrowserManager context (so it reloads
* the fresh cookies on next use), then closes the login browser.
*
* @param browser - The shared BrowserManager instance (used to clear its
* cached context after login so it picks up new cookies).
* @returns QR code data or an indication that the user is already logged in.
*/
export async function getLoginQRCode(
browser: BrowserManager,
): Promise<QRCodeResult> {
const { page, release } = await browser.acquirePage(PLATFORM);
log.info('Launching non-headless browser for QR code login');
const loginBrowser = await chromium.launch({
headless: false,
...(config.browserBin ? { executablePath: config.browserBin } : {}),
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
],
});
const ctx = await loginBrowser.newContext();
const page = await ctx.newPage();
let released = false;
const release = async (): Promise<void> => {
if (released) return;
released = true;
clearTimeout(safetyTimer);
await loginBrowser.close().catch((err: unknown) => {
log.warn({ err }, 'Failed to close login browser');
});
};
const safetyTimer = setTimeout(() => {
if (!released) {
log.warn('Login browser safety timeout: closing after 5 minutes');
void release();
}
}, LOGIN_BROWSER_SAFETY_MS);
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
safetyTimer.unref();
}
try {
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
log.debug({ url: page.url() }, 'Login browser navigated');
// Check whether the user is already logged in.
// Check whether the user is already logged in via saved cookies.
const alreadyLoggedIn = await page
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 })
.then(() => true)
@@ -85,9 +132,10 @@ export async function getLoginQRCode(
return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' };
}
// If the QR code is not visible yet, click the login button to open it.
// The login modal auto-appears after a few seconds (no button click needed).
// Wait up to 20s; if still absent try the login button as a fallback.
const qrVisible = await page
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 3_000 })
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 20_000 })
.then(() => true)
.catch(() => false);
@@ -114,11 +162,8 @@ export async function getLoginQRCode(
throw new Error('QR code image src attribute is empty');
}
// Fire-and-forget: wait for the user to scan the QR code in the
// background. On success, save cookies and release the page. On
// failure or timeout, just release the page. The `.catch()` ensures
// no unhandled rejection escapes.
waitForLoginAndRelease(page, browser, release).catch((err: unknown) => {
// Fire-and-forget: wait for the user to scan the QR code in the background.
waitForLoginAndRelease(page, ctx, browser, release).catch((err: unknown) => {
log.error({ err }, 'Login wait flow encountered an unexpected error');
});
@@ -128,8 +173,6 @@ export async function getLoginQRCode(
timeout: '4m',
};
} catch (err) {
// If anything goes wrong before we hand off to the background task,
// make sure the page is released.
await release();
throw err;
}
@@ -155,15 +198,19 @@ export async function deleteCookies(browser: BrowserManager): Promise<void> {
/**
* Background task that waits for the logged-in indicator to appear (meaning
* the user has scanned the QR code). On success it persists cookies. The
* page is released in all cases (success, timeout, error) via `finally`.
* the user has scanned the QR code).
*
* `release` is idempotent (guaranteed by BrowserManager.acquirePage), so
* even if the safety-net timer inside acquirePage fires concurrently, there
* is no double-close.
* On success:
* 1. Extracts storageState from the login browser context.
* 2. Saves cookies to disk via CookieStore.
* 3. Clears the main BrowserManager's cached context so the next
* headless operation creates a fresh context that reloads the cookies.
*
* The login browser is closed in all cases (success, timeout, error).
*/
async function waitForLoginAndRelease(
page: Page,
ctx: BrowserContext,
browser: BrowserManager,
release: () => Promise<void>,
): Promise<void> {
@@ -173,10 +220,17 @@ async function waitForLoginAndRelease(
});
log.info('QR code scanned — login detected, saving cookies');
await browser.saveCookies(PLATFORM);
const state = await ctx.storageState();
await cookieStore.save(PLATFORM, state);
// Clear the headless BrowserManager's cached context so the next
// withPage() call creates a new one that restores the saved cookies.
await browser.clearContext(PLATFORM);
log.info('Cookies saved and headless context refreshed');
} catch {
// Timeout or page closed — not an error, just means the user did not
// scan in time (or the page was released by the safety timer).
// Timeout or browser closed — not an error.
log.debug('Login wait ended without successful scan');
} finally {
await release();
+2 -2
View File
@@ -4,8 +4,8 @@
export const XHS_SELECTORS = {
login: {
/** QR code image on the login modal / page. */
qrCodeImage: '.login-container .qrcode-img',
/** QR code image on the login modal (auto-appears after a few seconds). */
qrCodeImage: 'img.qrcode-img',
/** Element present only when the user is logged in (sidebar channel link). */
loggedInIndicator: '.user .link-wrapper .channel',
/** The "login" button that opens the QR code modal (if not already shown). */