56d5a6de96
- 搜索页笔记 href 格式为 /search_result/<id>,修正正则以兼容 /explore/ 和 /search_result/ - 用户主页笔记 href 格式为 /user/profile/<userId>/<noteId>,扩展正则并取正确捕获组 - 用户主页访问前先 warm-up 到 /explore,绕过 XHS headless IP 风控(code 300012) - xsec_source 改为 pc_feed 以匹配用户从 feed 页获取的 token 类型 - 新增 debug-search.ts / debug-qrcode.ts / debug-profile.ts 诊断脚本
107 lines
3.4 KiB
TypeScript
107 lines
3.4 KiB
TypeScript
/**
|
|
* Standalone diagnostic script — run with:
|
|
* npx tsx scripts/debug-qrcode.ts
|
|
*
|
|
* Opens XHS explore page in headless mode and dumps:
|
|
* - page title & URL after navigation
|
|
* - all element counts for candidate selectors
|
|
* - screenshot saved to /tmp/xhs-debug.png
|
|
*/
|
|
|
|
import { chromium } from 'rebrowser-playwright';
|
|
import { writeFileSync } from 'node:fs';
|
|
|
|
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
|
const WAIT_MS = 15_000; // wait 15s for SPA to settle
|
|
|
|
const SELECTORS_TO_PROBE = [
|
|
// Current (fixed)
|
|
'img.qrcode-img',
|
|
'.qrcode-img',
|
|
// Original (broken)
|
|
'.login-container .qrcode-img',
|
|
// Login button candidates
|
|
'.login-btn',
|
|
'button.login-btn',
|
|
// Logged-in indicator candidates
|
|
'.user .link-wrapper .channel',
|
|
// Modal/container
|
|
'.container',
|
|
'div.container',
|
|
// QR area
|
|
'.code-area',
|
|
'.qrcode',
|
|
// Phone input (also in modal)
|
|
'input[name="xhs-pc-web-phone"]',
|
|
];
|
|
|
|
async function main() {
|
|
console.log('Launching browser (headless: false)...');
|
|
const browser = await chromium.launch({
|
|
headless: false,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu'],
|
|
});
|
|
|
|
const ctx = await browser.newContext();
|
|
const page = await ctx.newPage();
|
|
|
|
console.log(`Navigating to ${EXPLORE_URL} ...`);
|
|
const t0 = Date.now();
|
|
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
|
console.log(` domcontentloaded in ${Date.now() - t0}ms`);
|
|
console.log(` title: "${await page.title()}"`);
|
|
console.log(` url: ${page.url()}`);
|
|
|
|
console.log(`\nWaiting ${WAIT_MS / 1000}s for SPA to render...`);
|
|
await page.waitForTimeout(WAIT_MS);
|
|
console.log(` title after wait: "${await page.title()}"`);
|
|
console.log(` url after wait: ${page.url()}`);
|
|
|
|
console.log('\n--- Selector probe results ---');
|
|
for (const sel of SELECTORS_TO_PROBE) {
|
|
const count = await page.locator(sel).count();
|
|
const marker = count > 0 ? '✓' : '✗';
|
|
console.log(` ${marker} [${count}] ${sel}`);
|
|
if (count > 0 && sel.includes('qrcode')) {
|
|
// Print src attribute if it's an img
|
|
try {
|
|
const src = await page.locator(sel).first().getAttribute('src');
|
|
const preview = src ? src.slice(0, 60) + '...' : '(null)';
|
|
console.log(` src: ${preview}`);
|
|
} catch {/* not an img */}
|
|
}
|
|
}
|
|
|
|
// Dump all img srcs that look like QR codes (data URIs)
|
|
console.log('\n--- All <img> with data: src on page ---');
|
|
const imgs = await page.locator('img[src^="data:"]').all();
|
|
for (const img of imgs) {
|
|
const cls = await img.getAttribute('class').catch(() => '');
|
|
const src = await img.getAttribute('src').catch(() => '');
|
|
console.log(` class="${cls}" src="${src?.slice(0, 80)}..."`);
|
|
}
|
|
|
|
// Save screenshot
|
|
const screenshotPath = '/tmp/xhs-debug.png';
|
|
await page.screenshot({ path: screenshotPath, fullPage: false });
|
|
console.log(`\nScreenshot saved → ${screenshotPath}`);
|
|
|
|
// Also dump page HTML around any element matching qrcode
|
|
console.log('\n--- Outer HTML of .qrcode (if found) ---');
|
|
const qrDiv = page.locator('.qrcode').first();
|
|
if (await qrDiv.count() > 0) {
|
|
const html = await qrDiv.evaluate((el: Element) => el.outerHTML.slice(0, 500));
|
|
console.log(html);
|
|
} else {
|
|
console.log(' .qrcode not found');
|
|
}
|
|
|
|
await browser.close();
|
|
console.log('\nDone.');
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error('Fatal:', err);
|
|
process.exit(1);
|
|
});
|