Files
kurihada 56d5a6de96 修复搜索和用户主页的笔记 ID 提取及风控绕过问题
- 搜索页笔记 href 格式为 /search_result/<id>,修正正则以兼容 /explore/ 和 /search_result/
- 用户主页笔记 href 格式为 /user/profile/<userId>/<noteId>,扩展正则并取正确捕获组
- 用户主页访问前先 warm-up 到 /explore,绕过 XHS headless IP 风控(code 300012)
- xsec_source 改为 pc_feed 以匹配用户从 feed 页获取的 token 类型
- 新增 debug-search.ts / debug-qrcode.ts / debug-profile.ts 诊断脚本
2026-03-01 22:44:45 +08:00

107 lines
3.4 KiB
TypeScript

/**
* Standalone diagnostic script — run with:
* npx tsx scripts/debug-qrcode.ts
*
* Opens XHS explore page in headless mode and dumps:
* - page title & URL after navigation
* - all element counts for candidate selectors
* - screenshot saved to /tmp/xhs-debug.png
*/
import { chromium } from 'rebrowser-playwright';
import { writeFileSync } from 'node:fs';
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
const WAIT_MS = 15_000; // wait 15s for SPA to settle
const SELECTORS_TO_PROBE = [
// Current (fixed)
'img.qrcode-img',
'.qrcode-img',
// Original (broken)
'.login-container .qrcode-img',
// Login button candidates
'.login-btn',
'button.login-btn',
// Logged-in indicator candidates
'.user .link-wrapper .channel',
// Modal/container
'.container',
'div.container',
// QR area
'.code-area',
'.qrcode',
// Phone input (also in modal)
'input[name="xhs-pc-web-phone"]',
];
async function main() {
console.log('Launching browser (headless: false)...');
const browser = await chromium.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu'],
});
const ctx = await browser.newContext();
const page = await ctx.newPage();
console.log(`Navigating to ${EXPLORE_URL} ...`);
const t0 = Date.now();
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
console.log(` domcontentloaded in ${Date.now() - t0}ms`);
console.log(` title: "${await page.title()}"`);
console.log(` url: ${page.url()}`);
console.log(`\nWaiting ${WAIT_MS / 1000}s for SPA to render...`);
await page.waitForTimeout(WAIT_MS);
console.log(` title after wait: "${await page.title()}"`);
console.log(` url after wait: ${page.url()}`);
console.log('\n--- Selector probe results ---');
for (const sel of SELECTORS_TO_PROBE) {
const count = await page.locator(sel).count();
const marker = count > 0 ? '✓' : '✗';
console.log(` ${marker} [${count}] ${sel}`);
if (count > 0 && sel.includes('qrcode')) {
// Print src attribute if it's an img
try {
const src = await page.locator(sel).first().getAttribute('src');
const preview = src ? src.slice(0, 60) + '...' : '(null)';
console.log(` src: ${preview}`);
} catch {/* not an img */}
}
}
// Dump all img srcs that look like QR codes (data URIs)
console.log('\n--- All <img> with data: src on page ---');
const imgs = await page.locator('img[src^="data:"]').all();
for (const img of imgs) {
const cls = await img.getAttribute('class').catch(() => '');
const src = await img.getAttribute('src').catch(() => '');
console.log(` class="${cls}" src="${src?.slice(0, 80)}..."`);
}
// Save screenshot
const screenshotPath = '/tmp/xhs-debug.png';
await page.screenshot({ path: screenshotPath, fullPage: false });
console.log(`\nScreenshot saved → ${screenshotPath}`);
// Also dump page HTML around any element matching qrcode
console.log('\n--- Outer HTML of .qrcode (if found) ---');
const qrDiv = page.locator('.qrcode').first();
if (await qrDiv.count() > 0) {
const html = await qrDiv.evaluate((el: Element) => el.outerHTML.slice(0, 500));
console.log(html);
} else {
console.log(' .qrcode not found');
}
await browser.close();
console.log('\nDone.');
}
main().catch((err) => {
console.error('Fatal:', err);
process.exit(1);
});