fix(xhs): 登录二维码流程改用非无头浏览器绕过IP风控

XHS对未登录的无头浏览器请求触发风控(code 300012: IP存在风险),
重定向到安全限制页导致二维码选择器永远匹配不到。

- getLoginQRCode 单独启动 headless:false 浏览器,登录弹框可正常加载
- 扫码成功后直接从登录 context 提取 storageState 保存到磁盘
- 调用 browser.clearContext() 让主 BrowserManager 下次重新加载新 cookies
- waitForLoginAndRelease 改为接收 BrowserContext 并关闭登录浏览器
- 修正 qrCodeImage 选择器:'.login-container .qrcode-img' → 'img.qrcode-img'
This commit is contained in:
2026-03-01 17:59:39 +08:00
parent 838b244929
commit 8b39520ec7
2 changed files with 83 additions and 29 deletions
+81 -27
View File
@@ -1,6 +1,8 @@
import type { Page } from 'rebrowser-playwright'; import { chromium } from 'rebrowser-playwright';
import type { Page, BrowserContext } from 'rebrowser-playwright';
import type { BrowserManager } from '../../browser/manager.js'; import type { BrowserManager } from '../../browser/manager.js';
import { config } from '../../config/index.js';
import { logger } from '../../utils/logger.js'; import { logger } from '../../utils/logger.js';
import { cookieStore } from '../../cookie/store.js'; import { cookieStore } from '../../cookie/store.js';
import { XHS_SELECTORS } from './selectors.js'; import { XHS_SELECTORS } from './selectors.js';
@@ -16,6 +18,9 @@ const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
/** How long to wait for the user to scan the QR code (4 minutes). */ /** How long to wait for the user to scan the QR code (4 minutes). */
const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000; const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000;
/** Safety timeout for the login browser window (5 minutes). */
const LOGIN_BROWSER_SAFETY_MS = 5 * 60 * 1000;
const log = logger.child({ module: 'xhs-login' }); const log = logger.child({ module: 'xhs-login' });
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@@ -55,26 +60,68 @@ export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
/** /**
* Open the explore page, trigger the login modal if needed, and extract the * Open the explore page in a **non-headless** browser, wait for the login
* QR code image data. * modal QR code, and return the image data.
* *
* Because the user must scan the QR code with their phone (which takes an * Why non-headless? XHS detects unauthenticated headless requests and
* indeterminate amount of time), this function uses `acquirePage` instead of * redirects them to a security-restriction error page (code 300012:
* `withPage`. A fire-and-forget background task waits for the scan to * "IP存在风险"). Once the user scans the QR code and cookies are saved,
* complete, saves cookies, and releases the page. * all subsequent operations can use the normal headless BrowserManager.
* *
* @param browser - The shared BrowserManager instance. * A fire-and-forget background task waits for the scan to complete, saves
* cookies to disk, clears the main BrowserManager context (so it reloads
* the fresh cookies on next use), then closes the login browser.
*
* @param browser - The shared BrowserManager instance (used to clear its
* cached context after login so it picks up new cookies).
* @returns QR code data or an indication that the user is already logged in. * @returns QR code data or an indication that the user is already logged in.
*/ */
export async function getLoginQRCode( export async function getLoginQRCode(
browser: BrowserManager, browser: BrowserManager,
): Promise<QRCodeResult> { ): Promise<QRCodeResult> {
const { page, release } = await browser.acquirePage(PLATFORM); log.info('Launching non-headless browser for QR code login');
const loginBrowser = await chromium.launch({
headless: false,
...(config.browserBin ? { executablePath: config.browserBin } : {}),
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
],
});
const ctx = await loginBrowser.newContext();
const page = await ctx.newPage();
let released = false;
const release = async (): Promise<void> => {
if (released) return;
released = true;
clearTimeout(safetyTimer);
await loginBrowser.close().catch((err: unknown) => {
log.warn({ err }, 'Failed to close login browser');
});
};
const safetyTimer = setTimeout(() => {
if (!released) {
log.warn('Login browser safety timeout: closing after 5 minutes');
void release();
}
}, LOGIN_BROWSER_SAFETY_MS);
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
safetyTimer.unref();
}
try { try {
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' }); await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
log.debug({ url: page.url() }, 'Login browser navigated');
// Check whether the user is already logged in. // Check whether the user is already logged in via saved cookies.
const alreadyLoggedIn = await page const alreadyLoggedIn = await page
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 }) .waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 })
.then(() => true) .then(() => true)
@@ -85,9 +132,10 @@ export async function getLoginQRCode(
return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' }; return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' };
} }
// If the QR code is not visible yet, click the login button to open it. // The login modal auto-appears after a few seconds (no button click needed).
// Wait up to 20s; if still absent try the login button as a fallback.
const qrVisible = await page const qrVisible = await page
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 3_000 }) .waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 20_000 })
.then(() => true) .then(() => true)
.catch(() => false); .catch(() => false);
@@ -114,11 +162,8 @@ export async function getLoginQRCode(
throw new Error('QR code image src attribute is empty'); throw new Error('QR code image src attribute is empty');
} }
// Fire-and-forget: wait for the user to scan the QR code in the // Fire-and-forget: wait for the user to scan the QR code in the background.
// background. On success, save cookies and release the page. On waitForLoginAndRelease(page, ctx, browser, release).catch((err: unknown) => {
// failure or timeout, just release the page. The `.catch()` ensures
// no unhandled rejection escapes.
waitForLoginAndRelease(page, browser, release).catch((err: unknown) => {
log.error({ err }, 'Login wait flow encountered an unexpected error'); log.error({ err }, 'Login wait flow encountered an unexpected error');
}); });
@@ -128,8 +173,6 @@ export async function getLoginQRCode(
timeout: '4m', timeout: '4m',
}; };
} catch (err) { } catch (err) {
// If anything goes wrong before we hand off to the background task,
// make sure the page is released.
await release(); await release();
throw err; throw err;
} }
@@ -155,15 +198,19 @@ export async function deleteCookies(browser: BrowserManager): Promise<void> {
/** /**
* Background task that waits for the logged-in indicator to appear (meaning * Background task that waits for the logged-in indicator to appear (meaning
* the user has scanned the QR code). On success it persists cookies. The * the user has scanned the QR code).
* page is released in all cases (success, timeout, error) via `finally`.
* *
* `release` is idempotent (guaranteed by BrowserManager.acquirePage), so * On success:
* even if the safety-net timer inside acquirePage fires concurrently, there * 1. Extracts storageState from the login browser context.
* is no double-close. * 2. Saves cookies to disk via CookieStore.
* 3. Clears the main BrowserManager's cached context so the next
* headless operation creates a fresh context that reloads the cookies.
*
* The login browser is closed in all cases (success, timeout, error).
*/ */
async function waitForLoginAndRelease( async function waitForLoginAndRelease(
page: Page, page: Page,
ctx: BrowserContext,
browser: BrowserManager, browser: BrowserManager,
release: () => Promise<void>, release: () => Promise<void>,
): Promise<void> { ): Promise<void> {
@@ -173,10 +220,17 @@ async function waitForLoginAndRelease(
}); });
log.info('QR code scanned — login detected, saving cookies'); log.info('QR code scanned — login detected, saving cookies');
await browser.saveCookies(PLATFORM);
const state = await ctx.storageState();
await cookieStore.save(PLATFORM, state);
// Clear the headless BrowserManager's cached context so the next
// withPage() call creates a new one that restores the saved cookies.
await browser.clearContext(PLATFORM);
log.info('Cookies saved and headless context refreshed');
} catch { } catch {
// Timeout or page closed — not an error, just means the user did not // Timeout or browser closed — not an error.
// scan in time (or the page was released by the safety timer).
log.debug('Login wait ended without successful scan'); log.debug('Login wait ended without successful scan');
} finally { } finally {
await release(); await release();
+2 -2
View File
@@ -4,8 +4,8 @@
export const XHS_SELECTORS = { export const XHS_SELECTORS = {
login: { login: {
/** QR code image on the login modal / page. */ /** QR code image on the login modal (auto-appears after a few seconds). */
qrCodeImage: '.login-container .qrcode-img', qrCodeImage: 'img.qrcode-img',
/** Element present only when the user is logged in (sidebar channel link). */ /** Element present only when the user is logged in (sidebar channel link). */
loggedInIndicator: '.user .link-wrapper .channel', loggedInIndicator: '.user .link-wrapper .channel',
/** The "login" button that opens the QR code modal (if not already shown). */ /** The "login" button that opens the QR code modal (if not already shown). */