56d5a6de96
- 搜索页笔记 href 格式为 /search_result/<id>,修正正则以兼容 /explore/ 和 /search_result/ - 用户主页笔记 href 格式为 /user/profile/<userId>/<noteId>,扩展正则并取正确捕获组 - 用户主页访问前先 warm-up 到 /explore,绕过 XHS headless IP 风控(code 300012) - xsec_source 改为 pc_feed 以匹配用户从 feed 页获取的 token 类型 - 新增 debug-search.ts / debug-qrcode.ts / debug-profile.ts 诊断脚本
111 lines
3.8 KiB
TypeScript
111 lines
3.8 KiB
TypeScript
/**
|
|
* Standalone diagnostic script for search:
|
|
* npx tsx scripts/debug-search.ts [keyword]
|
|
*/
|
|
|
|
import { chromium } from 'rebrowser-playwright';
|
|
import { readFileSync } from 'node:fs';
|
|
|
|
const keyword = process.argv[2] ?? '美食';
|
|
const SEARCH_URL = `https://www.xiaohongshu.com/search_result?keyword=${encodeURIComponent(keyword)}`;
|
|
const COOKIE_FILE = `${process.env.HOME}/.social-mcp/xiaohongshu/cookies.json`;
|
|
|
|
async function main() {
|
|
// Load cookies if available
|
|
let storageState: object | undefined;
|
|
try {
|
|
const raw = JSON.parse(readFileSync(COOKIE_FILE, 'utf-8'));
|
|
storageState = raw;
|
|
console.log(`Loaded cookies (${raw.cookies?.length ?? 0} cookies)`);
|
|
} catch {
|
|
console.log('No cookies found — running without session');
|
|
}
|
|
|
|
const browser = await chromium.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu'],
|
|
});
|
|
|
|
const ctx = await browser.newContext(storageState ? { storageState: storageState as any } : {});
|
|
const page = await ctx.newPage();
|
|
|
|
console.log(`\nNavigating to: ${SEARCH_URL}`);
|
|
await page.goto(SEARCH_URL, { waitUntil: 'domcontentloaded' });
|
|
console.log(`title: "${await page.title()}"`);
|
|
console.log(`url: ${page.url()}`);
|
|
|
|
console.log('\nWaiting 5s for SPA...');
|
|
await page.waitForTimeout(5000);
|
|
console.log(`title after wait: "${await page.title()}"`);
|
|
console.log(`url after wait: ${page.url()}`);
|
|
|
|
// --- __INITIAL_STATE__ ---
|
|
console.log('\n--- __INITIAL_STATE__ top-level keys ---');
|
|
const state: any = await page.evaluate('window.__INITIAL_STATE__').catch(() => null);
|
|
if (!state) {
|
|
console.log(' __INITIAL_STATE__ not found!');
|
|
} else {
|
|
const keys = Object.keys(state);
|
|
console.log(` keys: ${keys.join(', ')}`);
|
|
|
|
// Print structure of each key
|
|
for (const k of keys) {
|
|
const v = state[k];
|
|
if (v && typeof v === 'object') {
|
|
const subKeys = Object.keys(v);
|
|
console.log(` [${k}]: { ${subKeys.map((sk: string) => {
|
|
const sv = (v as any)[sk];
|
|
return `${sk}: ${Array.isArray(sv) ? `Array(${sv.length})` : typeof sv}`;
|
|
}).join(', ')} }`);
|
|
} else {
|
|
console.log(` [${k}]: ${typeof v}`);
|
|
}
|
|
}
|
|
|
|
// Try to find feeds/notes arrays
|
|
console.log('\n--- Looking for feed/note arrays in __INITIAL_STATE__ ---');
|
|
function findArrays(obj: any, path: string, depth = 0) {
|
|
if (depth > 3) return;
|
|
if (!obj || typeof obj !== 'object') return;
|
|
for (const [k, v] of Object.entries(obj)) {
|
|
const fullPath = path ? `${path}.${k}` : k;
|
|
if (Array.isArray(v) && v.length > 0) {
|
|
const sample = v[0];
|
|
const sampleKeys = typeof sample === 'object' ? Object.keys(sample as object).slice(0, 6).join(', ') : typeof sample;
|
|
console.log(` Array found: ${fullPath} (length=${v.length}) sample keys: [${sampleKeys}]`);
|
|
} else if (v && typeof v === 'object' && !Array.isArray(v)) {
|
|
findArrays(v, fullPath, depth + 1);
|
|
}
|
|
}
|
|
}
|
|
findArrays(state, '');
|
|
}
|
|
|
|
// --- DOM selectors ---
|
|
console.log('\n--- DOM selector probe ---');
|
|
const selectors = [
|
|
'.feeds-container .note-item',
|
|
'.note-item',
|
|
'#global-search-result-container',
|
|
'.search-result',
|
|
'.result-container',
|
|
'[class*="note"]',
|
|
'[class*="feed"]',
|
|
'[class*="result"]',
|
|
];
|
|
for (const sel of selectors) {
|
|
const count = await page.locator(sel).count();
|
|
if (count > 0) console.log(` ✓ [${count}] ${sel}`);
|
|
else console.log(` ✗ [0] ${sel}`);
|
|
}
|
|
|
|
// Screenshot
|
|
const screenshotPath = '/tmp/xhs-search-debug.png';
|
|
await page.screenshot({ path: screenshotPath });
|
|
console.log(`\nScreenshot → ${screenshotPath}`);
|
|
|
|
await browser.close();
|
|
}
|
|
|
|
main().catch((e) => { console.error(e); process.exit(1); });
|