Files
kurihada 56d5a6de96 修复搜索和用户主页的笔记 ID 提取及风控绕过问题
- 搜索页笔记 href 格式为 /search_result/<id>,修正正则以兼容 /explore/ 和 /search_result/
- 用户主页笔记 href 格式为 /user/profile/<userId>/<noteId>,扩展正则并取正确捕获组
- 用户主页访问前先 warm-up 到 /explore,绕过 XHS headless IP 风控(code 300012)
- xsec_source 改为 pc_feed 以匹配用户从 feed 页获取的 token 类型
- 新增 debug-search.ts / debug-qrcode.ts / debug-profile.ts 诊断脚本
2026-03-01 22:44:45 +08:00

111 lines
3.8 KiB
TypeScript

/**
* Standalone diagnostic script for search:
* npx tsx scripts/debug-search.ts [keyword]
*/
import { chromium } from 'rebrowser-playwright';
import { readFileSync } from 'node:fs';
const keyword = process.argv[2] ?? '美食';
const SEARCH_URL = `https://www.xiaohongshu.com/search_result?keyword=${encodeURIComponent(keyword)}`;
const COOKIE_FILE = `${process.env.HOME}/.social-mcp/xiaohongshu/cookies.json`;
async function main() {
// Load cookies if available
let storageState: object | undefined;
try {
const raw = JSON.parse(readFileSync(COOKIE_FILE, 'utf-8'));
storageState = raw;
console.log(`Loaded cookies (${raw.cookies?.length ?? 0} cookies)`);
} catch {
console.log('No cookies found — running without session');
}
const browser = await chromium.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu'],
});
const ctx = await browser.newContext(storageState ? { storageState: storageState as any } : {});
const page = await ctx.newPage();
console.log(`\nNavigating to: ${SEARCH_URL}`);
await page.goto(SEARCH_URL, { waitUntil: 'domcontentloaded' });
console.log(`title: "${await page.title()}"`);
console.log(`url: ${page.url()}`);
console.log('\nWaiting 5s for SPA...');
await page.waitForTimeout(5000);
console.log(`title after wait: "${await page.title()}"`);
console.log(`url after wait: ${page.url()}`);
// --- __INITIAL_STATE__ ---
console.log('\n--- __INITIAL_STATE__ top-level keys ---');
const state: any = await page.evaluate('window.__INITIAL_STATE__').catch(() => null);
if (!state) {
console.log(' __INITIAL_STATE__ not found!');
} else {
const keys = Object.keys(state);
console.log(` keys: ${keys.join(', ')}`);
// Print structure of each key
for (const k of keys) {
const v = state[k];
if (v && typeof v === 'object') {
const subKeys = Object.keys(v);
console.log(` [${k}]: { ${subKeys.map((sk: string) => {
const sv = (v as any)[sk];
return `${sk}: ${Array.isArray(sv) ? `Array(${sv.length})` : typeof sv}`;
}).join(', ')} }`);
} else {
console.log(` [${k}]: ${typeof v}`);
}
}
// Try to find feeds/notes arrays
console.log('\n--- Looking for feed/note arrays in __INITIAL_STATE__ ---');
function findArrays(obj: any, path: string, depth = 0) {
if (depth > 3) return;
if (!obj || typeof obj !== 'object') return;
for (const [k, v] of Object.entries(obj)) {
const fullPath = path ? `${path}.${k}` : k;
if (Array.isArray(v) && v.length > 0) {
const sample = v[0];
const sampleKeys = typeof sample === 'object' ? Object.keys(sample as object).slice(0, 6).join(', ') : typeof sample;
console.log(` Array found: ${fullPath} (length=${v.length}) sample keys: [${sampleKeys}]`);
} else if (v && typeof v === 'object' && !Array.isArray(v)) {
findArrays(v, fullPath, depth + 1);
}
}
}
findArrays(state, '');
}
// --- DOM selectors ---
console.log('\n--- DOM selector probe ---');
const selectors = [
'.feeds-container .note-item',
'.note-item',
'#global-search-result-container',
'.search-result',
'.result-container',
'[class*="note"]',
'[class*="feed"]',
'[class*="result"]',
];
for (const sel of selectors) {
const count = await page.locator(sel).count();
if (count > 0) console.log(` ✓ [${count}] ${sel}`);
else console.log(` ✗ [0] ${sel}`);
}
// Screenshot
const screenshotPath = '/tmp/xhs-search-debug.png';
await page.screenshot({ path: screenshotPath });
console.log(`\nScreenshot → ${screenshotPath}`);
await browser.close();
}
main().catch((e) => { console.error(e); process.exit(1); });