fix(xhs): 登录二维码流程改用非无头浏览器绕过IP风控
XHS对未登录的无头浏览器请求触发风控(code 300012: IP存在风险), 重定向到安全限制页导致二维码选择器永远匹配不到。 - getLoginQRCode 单独启动 headless:false 浏览器,登录弹框可正常加载 - 扫码成功后直接从登录 context 提取 storageState 保存到磁盘 - 调用 browser.clearContext() 让主 BrowserManager 下次重新加载新 cookies - waitForLoginAndRelease 改为接收 BrowserContext 并关闭登录浏览器 - 修正 qrCodeImage 选择器:'.login-container .qrcode-img' → 'img.qrcode-img'
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
import type { Page } from 'rebrowser-playwright';
|
import { chromium } from 'rebrowser-playwright';
|
||||||
|
import type { Page, BrowserContext } from 'rebrowser-playwright';
|
||||||
|
|
||||||
import type { BrowserManager } from '../../browser/manager.js';
|
import type { BrowserManager } from '../../browser/manager.js';
|
||||||
|
import { config } from '../../config/index.js';
|
||||||
import { logger } from '../../utils/logger.js';
|
import { logger } from '../../utils/logger.js';
|
||||||
import { cookieStore } from '../../cookie/store.js';
|
import { cookieStore } from '../../cookie/store.js';
|
||||||
import { XHS_SELECTORS } from './selectors.js';
|
import { XHS_SELECTORS } from './selectors.js';
|
||||||
@@ -16,6 +18,9 @@ const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
|||||||
/** How long to wait for the user to scan the QR code (4 minutes). */
|
/** How long to wait for the user to scan the QR code (4 minutes). */
|
||||||
const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000;
|
const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000;
|
||||||
|
|
||||||
|
/** Safety timeout for the login browser window (5 minutes). */
|
||||||
|
const LOGIN_BROWSER_SAFETY_MS = 5 * 60 * 1000;
|
||||||
|
|
||||||
const log = logger.child({ module: 'xhs-login' });
|
const log = logger.child({ module: 'xhs-login' });
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -55,26 +60,68 @@ export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open the explore page, trigger the login modal if needed, and extract the
|
* Open the explore page in a **non-headless** browser, wait for the login
|
||||||
* QR code image data.
|
* modal QR code, and return the image data.
|
||||||
*
|
*
|
||||||
* Because the user must scan the QR code with their phone (which takes an
|
* Why non-headless? XHS detects unauthenticated headless requests and
|
||||||
* indeterminate amount of time), this function uses `acquirePage` instead of
|
* redirects them to a security-restriction error page (code 300012:
|
||||||
* `withPage`. A fire-and-forget background task waits for the scan to
|
* "IP存在风险"). Once the user scans the QR code and cookies are saved,
|
||||||
* complete, saves cookies, and releases the page.
|
* all subsequent operations can use the normal headless BrowserManager.
|
||||||
*
|
*
|
||||||
* @param browser - The shared BrowserManager instance.
|
* A fire-and-forget background task waits for the scan to complete, saves
|
||||||
|
* cookies to disk, clears the main BrowserManager context (so it reloads
|
||||||
|
* the fresh cookies on next use), then closes the login browser.
|
||||||
|
*
|
||||||
|
* @param browser - The shared BrowserManager instance (used to clear its
|
||||||
|
* cached context after login so it picks up new cookies).
|
||||||
* @returns QR code data or an indication that the user is already logged in.
|
* @returns QR code data or an indication that the user is already logged in.
|
||||||
*/
|
*/
|
||||||
export async function getLoginQRCode(
|
export async function getLoginQRCode(
|
||||||
browser: BrowserManager,
|
browser: BrowserManager,
|
||||||
): Promise<QRCodeResult> {
|
): Promise<QRCodeResult> {
|
||||||
const { page, release } = await browser.acquirePage(PLATFORM);
|
log.info('Launching non-headless browser for QR code login');
|
||||||
|
|
||||||
|
const loginBrowser = await chromium.launch({
|
||||||
|
headless: false,
|
||||||
|
...(config.browserBin ? { executablePath: config.browserBin } : {}),
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-gpu',
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const ctx = await loginBrowser.newContext();
|
||||||
|
const page = await ctx.newPage();
|
||||||
|
|
||||||
|
let released = false;
|
||||||
|
|
||||||
|
const release = async (): Promise<void> => {
|
||||||
|
if (released) return;
|
||||||
|
released = true;
|
||||||
|
clearTimeout(safetyTimer);
|
||||||
|
await loginBrowser.close().catch((err: unknown) => {
|
||||||
|
log.warn({ err }, 'Failed to close login browser');
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const safetyTimer = setTimeout(() => {
|
||||||
|
if (!released) {
|
||||||
|
log.warn('Login browser safety timeout: closing after 5 minutes');
|
||||||
|
void release();
|
||||||
|
}
|
||||||
|
}, LOGIN_BROWSER_SAFETY_MS);
|
||||||
|
|
||||||
|
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
|
||||||
|
safetyTimer.unref();
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||||
|
log.debug({ url: page.url() }, 'Login browser navigated');
|
||||||
|
|
||||||
// Check whether the user is already logged in.
|
// Check whether the user is already logged in via saved cookies.
|
||||||
const alreadyLoggedIn = await page
|
const alreadyLoggedIn = await page
|
||||||
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 })
|
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 })
|
||||||
.then(() => true)
|
.then(() => true)
|
||||||
@@ -85,9 +132,10 @@ export async function getLoginQRCode(
|
|||||||
return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' };
|
return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' };
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the QR code is not visible yet, click the login button to open it.
|
// The login modal auto-appears after a few seconds (no button click needed).
|
||||||
|
// Wait up to 20s; if still absent try the login button as a fallback.
|
||||||
const qrVisible = await page
|
const qrVisible = await page
|
||||||
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 3_000 })
|
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 20_000 })
|
||||||
.then(() => true)
|
.then(() => true)
|
||||||
.catch(() => false);
|
.catch(() => false);
|
||||||
|
|
||||||
@@ -114,11 +162,8 @@ export async function getLoginQRCode(
|
|||||||
throw new Error('QR code image src attribute is empty');
|
throw new Error('QR code image src attribute is empty');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fire-and-forget: wait for the user to scan the QR code in the
|
// Fire-and-forget: wait for the user to scan the QR code in the background.
|
||||||
// background. On success, save cookies and release the page. On
|
waitForLoginAndRelease(page, ctx, browser, release).catch((err: unknown) => {
|
||||||
// failure or timeout, just release the page. The `.catch()` ensures
|
|
||||||
// no unhandled rejection escapes.
|
|
||||||
waitForLoginAndRelease(page, browser, release).catch((err: unknown) => {
|
|
||||||
log.error({ err }, 'Login wait flow encountered an unexpected error');
|
log.error({ err }, 'Login wait flow encountered an unexpected error');
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -128,8 +173,6 @@ export async function getLoginQRCode(
|
|||||||
timeout: '4m',
|
timeout: '4m',
|
||||||
};
|
};
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// If anything goes wrong before we hand off to the background task,
|
|
||||||
// make sure the page is released.
|
|
||||||
await release();
|
await release();
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
@@ -155,15 +198,19 @@ export async function deleteCookies(browser: BrowserManager): Promise<void> {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Background task that waits for the logged-in indicator to appear (meaning
|
* Background task that waits for the logged-in indicator to appear (meaning
|
||||||
* the user has scanned the QR code). On success it persists cookies. The
|
* the user has scanned the QR code).
|
||||||
* page is released in all cases (success, timeout, error) via `finally`.
|
|
||||||
*
|
*
|
||||||
* `release` is idempotent (guaranteed by BrowserManager.acquirePage), so
|
* On success:
|
||||||
* even if the safety-net timer inside acquirePage fires concurrently, there
|
* 1. Extracts storageState from the login browser context.
|
||||||
* is no double-close.
|
* 2. Saves cookies to disk via CookieStore.
|
||||||
|
* 3. Clears the main BrowserManager's cached context so the next
|
||||||
|
* headless operation creates a fresh context that reloads the cookies.
|
||||||
|
*
|
||||||
|
* The login browser is closed in all cases (success, timeout, error).
|
||||||
*/
|
*/
|
||||||
async function waitForLoginAndRelease(
|
async function waitForLoginAndRelease(
|
||||||
page: Page,
|
page: Page,
|
||||||
|
ctx: BrowserContext,
|
||||||
browser: BrowserManager,
|
browser: BrowserManager,
|
||||||
release: () => Promise<void>,
|
release: () => Promise<void>,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
@@ -173,10 +220,17 @@ async function waitForLoginAndRelease(
|
|||||||
});
|
});
|
||||||
|
|
||||||
log.info('QR code scanned — login detected, saving cookies');
|
log.info('QR code scanned — login detected, saving cookies');
|
||||||
await browser.saveCookies(PLATFORM);
|
|
||||||
|
const state = await ctx.storageState();
|
||||||
|
await cookieStore.save(PLATFORM, state);
|
||||||
|
|
||||||
|
// Clear the headless BrowserManager's cached context so the next
|
||||||
|
// withPage() call creates a new one that restores the saved cookies.
|
||||||
|
await browser.clearContext(PLATFORM);
|
||||||
|
|
||||||
|
log.info('Cookies saved and headless context refreshed');
|
||||||
} catch {
|
} catch {
|
||||||
// Timeout or page closed — not an error, just means the user did not
|
// Timeout or browser closed — not an error.
|
||||||
// scan in time (or the page was released by the safety timer).
|
|
||||||
log.debug('Login wait ended without successful scan');
|
log.debug('Login wait ended without successful scan');
|
||||||
} finally {
|
} finally {
|
||||||
await release();
|
await release();
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
|
|
||||||
export const XHS_SELECTORS = {
|
export const XHS_SELECTORS = {
|
||||||
login: {
|
login: {
|
||||||
/** QR code image on the login modal / page. */
|
/** QR code image on the login modal (auto-appears after a few seconds). */
|
||||||
qrCodeImage: '.login-container .qrcode-img',
|
qrCodeImage: 'img.qrcode-img',
|
||||||
/** Element present only when the user is logged in (sidebar channel link). */
|
/** Element present only when the user is logged in (sidebar channel link). */
|
||||||
loggedInIndicator: '.user .link-wrapper .channel',
|
loggedInIndicator: '.user .link-wrapper .channel',
|
||||||
/** The "login" button that opens the QR code modal (if not already shown). */
|
/** The "login" button that opens the QR code modal (if not already shown). */
|
||||||
|
|||||||
Reference in New Issue
Block a user