修复搜索和用户主页的笔记 ID 提取及风控绕过问题
- 搜索页笔记 href 格式为 /search_result/<id>,修正正则以兼容 /explore/ 和 /search_result/ - 用户主页笔记 href 格式为 /user/profile/<userId>/<noteId>,扩展正则并取正确捕获组 - 用户主页访问前先 warm-up 到 /explore,绕过 XHS headless IP 风控(code 300012) - xsec_source 改为 pc_feed 以匹配用户从 feed 页获取的 token 类型 - 新增 debug-search.ts / debug-qrcode.ts / debug-profile.ts 诊断脚本
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* Standalone diagnostic script — run with:
|
||||
* npx tsx scripts/debug-qrcode.ts
|
||||
*
|
||||
* Opens XHS explore page in headless mode and dumps:
|
||||
* - page title & URL after navigation
|
||||
* - all element counts for candidate selectors
|
||||
* - screenshot saved to /tmp/xhs-debug.png
|
||||
*/
|
||||
|
||||
import { chromium } from 'rebrowser-playwright';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
const WAIT_MS = 15_000; // wait 15s for SPA to settle
|
||||
|
||||
const SELECTORS_TO_PROBE = [
|
||||
// Current (fixed)
|
||||
'img.qrcode-img',
|
||||
'.qrcode-img',
|
||||
// Original (broken)
|
||||
'.login-container .qrcode-img',
|
||||
// Login button candidates
|
||||
'.login-btn',
|
||||
'button.login-btn',
|
||||
// Logged-in indicator candidates
|
||||
'.user .link-wrapper .channel',
|
||||
// Modal/container
|
||||
'.container',
|
||||
'div.container',
|
||||
// QR area
|
||||
'.code-area',
|
||||
'.qrcode',
|
||||
// Phone input (also in modal)
|
||||
'input[name="xhs-pc-web-phone"]',
|
||||
];
|
||||
|
||||
async function main() {
|
||||
console.log('Launching browser (headless: false)...');
|
||||
const browser = await chromium.launch({
|
||||
headless: false,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu'],
|
||||
});
|
||||
|
||||
const ctx = await browser.newContext();
|
||||
const page = await ctx.newPage();
|
||||
|
||||
console.log(`Navigating to ${EXPLORE_URL} ...`);
|
||||
const t0 = Date.now();
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
console.log(` domcontentloaded in ${Date.now() - t0}ms`);
|
||||
console.log(` title: "${await page.title()}"`);
|
||||
console.log(` url: ${page.url()}`);
|
||||
|
||||
console.log(`\nWaiting ${WAIT_MS / 1000}s for SPA to render...`);
|
||||
await page.waitForTimeout(WAIT_MS);
|
||||
console.log(` title after wait: "${await page.title()}"`);
|
||||
console.log(` url after wait: ${page.url()}`);
|
||||
|
||||
console.log('\n--- Selector probe results ---');
|
||||
for (const sel of SELECTORS_TO_PROBE) {
|
||||
const count = await page.locator(sel).count();
|
||||
const marker = count > 0 ? '✓' : '✗';
|
||||
console.log(` ${marker} [${count}] ${sel}`);
|
||||
if (count > 0 && sel.includes('qrcode')) {
|
||||
// Print src attribute if it's an img
|
||||
try {
|
||||
const src = await page.locator(sel).first().getAttribute('src');
|
||||
const preview = src ? src.slice(0, 60) + '...' : '(null)';
|
||||
console.log(` src: ${preview}`);
|
||||
} catch {/* not an img */}
|
||||
}
|
||||
}
|
||||
|
||||
// Dump all img srcs that look like QR codes (data URIs)
|
||||
console.log('\n--- All <img> with data: src on page ---');
|
||||
const imgs = await page.locator('img[src^="data:"]').all();
|
||||
for (const img of imgs) {
|
||||
const cls = await img.getAttribute('class').catch(() => '');
|
||||
const src = await img.getAttribute('src').catch(() => '');
|
||||
console.log(` class="${cls}" src="${src?.slice(0, 80)}..."`);
|
||||
}
|
||||
|
||||
// Save screenshot
|
||||
const screenshotPath = '/tmp/xhs-debug.png';
|
||||
await page.screenshot({ path: screenshotPath, fullPage: false });
|
||||
console.log(`\nScreenshot saved → ${screenshotPath}`);
|
||||
|
||||
// Also dump page HTML around any element matching qrcode
|
||||
console.log('\n--- Outer HTML of .qrcode (if found) ---');
|
||||
const qrDiv = page.locator('.qrcode').first();
|
||||
if (await qrDiv.count() > 0) {
|
||||
const html = await qrDiv.evaluate((el: Element) => el.outerHTML.slice(0, 500));
|
||||
console.log(html);
|
||||
} else {
|
||||
console.log(' .qrcode not found');
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
console.log('\nDone.');
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error('Fatal:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user