重构为Monorepo:拆分xhs/xhh应用与core包并完成双服务部署改造
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "@social/xhh-mcp",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"main": "dist/main.js",
|
||||
"bin": {
|
||||
"mcp-xhh": "dist/main.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsup",
|
||||
"lint": "tsc --noEmit",
|
||||
"test": "vitest run",
|
||||
"start": "PORT=${PORT:-9528} COOKIE_DIR=${COOKIE_DIR:-$HOME/.social-mcp-xhh} node dist/main.js",
|
||||
"dev": "pnpm build && pnpm start"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.27.0",
|
||||
"@social/core": "workspace:*",
|
||||
"zod": "^3.25.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.0.0",
|
||||
"tsup": "^8.0.0",
|
||||
"typescript": "^5.7.0",
|
||||
"vitest": "^3.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
import { startServerWithPlugins } from '@social/core/server/bootstrap.js';
|
||||
import { xiaoheihePlugin } from './platforms/xiaoheihe/index.js';
|
||||
|
||||
startServerWithPlugins([xiaoheihePlugin]);
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import { detectCaptchaText } from './extractors.js';
|
||||
|
||||
const log = logger.child({ module: 'xhh-comment' });
|
||||
|
||||
function buildDetailUrl(linkId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/bbs/link/${encodeURIComponent(linkId)}`;
|
||||
}
|
||||
|
||||
export async function postComment(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
content: string,
|
||||
): Promise<{ success: boolean; comment_id?: string }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on comment page');
|
||||
}
|
||||
|
||||
const ok = await fillCommentInput(page, content);
|
||||
if (!ok) return { success: false };
|
||||
|
||||
const submitted = await clickFirstVisible(page, XHH_SELECTORS.detail.commentSubmit);
|
||||
if (!submitted) return { success: false };
|
||||
|
||||
await page.waitForTimeout(1_500);
|
||||
|
||||
const commentId = await page.evaluate(
|
||||
({ selectors, contentLike }: { selectors: typeof XHH_SELECTORS; contentLike: string }) => {
|
||||
const nodes = [...document.querySelectorAll<HTMLElement>(selectors.detail.commentItem.join(', '))];
|
||||
const hit = nodes.find((node) => node.textContent?.includes(contentLike));
|
||||
if (!hit) return '';
|
||||
return (
|
||||
hit.getAttribute('data-comment-id') ||
|
||||
hit.getAttribute('comment-id') ||
|
||||
hit.id ||
|
||||
''
|
||||
);
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, contentLike: content.slice(0, 24) },
|
||||
);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
...(commentId ? { comment_id: commentId as string } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export async function replyComment(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
commentId: string,
|
||||
content: string,
|
||||
): Promise<{ success: boolean; reply_id?: string }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on reply page');
|
||||
}
|
||||
|
||||
await page.evaluate(
|
||||
({ selectors, targetCommentId }) => {
|
||||
const comments = [...document.querySelectorAll<HTMLElement>(selectors.detail.commentItem.join(', '))];
|
||||
const target = comments.find((node) => {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
if (id === targetCommentId) return true;
|
||||
return node.outerHTML.includes(targetCommentId);
|
||||
});
|
||||
if (!target) return;
|
||||
const replyBtn = [...target.querySelectorAll<HTMLElement>('button, [role="button"], .reply-btn, .comment-reply')]
|
||||
.find((node) => {
|
||||
const text = (node.textContent ?? '').trim();
|
||||
const cls = node.className.toString().toLowerCase();
|
||||
return text.includes('回复') || cls.includes('reply');
|
||||
}) ?? null;
|
||||
replyBtn?.click();
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, targetCommentId: commentId },
|
||||
);
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
const ok = await fillCommentInput(page, content);
|
||||
if (!ok) return { success: false };
|
||||
|
||||
const submitted = await clickFirstVisible(page, XHH_SELECTORS.detail.commentSubmit);
|
||||
if (!submitted) return { success: false };
|
||||
|
||||
await page.waitForTimeout(1_500);
|
||||
|
||||
const replyId = await page.evaluate(
|
||||
(
|
||||
{
|
||||
selectors,
|
||||
targetCommentId,
|
||||
contentLike,
|
||||
}: { selectors: typeof XHH_SELECTORS; targetCommentId: string; contentLike: string },
|
||||
) => {
|
||||
const comments = [...document.querySelectorAll<HTMLElement>(selectors.detail.commentItem.join(', '))];
|
||||
const target = comments.find((node) => {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
if (id === targetCommentId) return true;
|
||||
return node.outerHTML.includes(targetCommentId);
|
||||
});
|
||||
if (!target) return '';
|
||||
const replies = [...target.querySelectorAll<HTMLElement>(selectors.detail.subCommentItem.join(', '))];
|
||||
const hit = replies.find((node) => node.textContent?.includes(contentLike));
|
||||
if (!hit) return '';
|
||||
return (
|
||||
hit.getAttribute('data-comment-id') ||
|
||||
hit.getAttribute('comment-id') ||
|
||||
hit.id ||
|
||||
''
|
||||
);
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, targetCommentId: commentId, contentLike: content.slice(0, 24) },
|
||||
);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
...(replyId ? { reply_id: replyId as string } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
async function fillCommentInput(page: Page, content: string): Promise<boolean> {
|
||||
for (const selector of XHH_SELECTORS.detail.commentInput) {
|
||||
const input = await page.$(selector).catch(() => null);
|
||||
if (!input) continue;
|
||||
await input.click().catch(() => {});
|
||||
await page.waitForTimeout(200);
|
||||
|
||||
const isOk = await page
|
||||
.evaluate(
|
||||
({ selector, content }) => {
|
||||
const node = document.querySelector(selector);
|
||||
if (!node) return false;
|
||||
if (node instanceof HTMLTextAreaElement || node instanceof HTMLInputElement) {
|
||||
node.value = content;
|
||||
node.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
return true;
|
||||
}
|
||||
if (node instanceof HTMLElement && node.isContentEditable) {
|
||||
node.focus();
|
||||
node.textContent = content;
|
||||
node.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
},
|
||||
{ selector, content },
|
||||
)
|
||||
.catch(() => false);
|
||||
|
||||
if (isOk) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function clickFirstVisible(page: Page, selectors: readonly string[]): Promise<boolean> {
|
||||
for (const selector of selectors) {
|
||||
const clicked = await page
|
||||
.locator(selector)
|
||||
.first()
|
||||
.click({ timeout: 2_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
if (clicked) return true;
|
||||
}
|
||||
log.warn({ selectors }, 'no clickable submit button');
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
export interface KeysetCursorPayload {
|
||||
key: string;
|
||||
}
|
||||
|
||||
export interface KeysetPage<T> {
|
||||
items: T[];
|
||||
hasMore: boolean;
|
||||
nextCursor?: string;
|
||||
}
|
||||
|
||||
export function encodeKeysetCursor(payload: KeysetCursorPayload): string {
|
||||
return Buffer.from(JSON.stringify(payload), 'utf8').toString('base64url');
|
||||
}
|
||||
|
||||
export function decodeKeysetCursor(cursor?: string): KeysetCursorPayload | undefined {
|
||||
if (!cursor) return undefined;
|
||||
|
||||
try {
|
||||
const raw = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf8')) as {
|
||||
key?: unknown;
|
||||
};
|
||||
|
||||
if (typeof raw.key !== 'string' || raw.key.length === 0) {
|
||||
throw new Error('Invalid keyset cursor payload');
|
||||
}
|
||||
|
||||
return { key: raw.key };
|
||||
} catch {
|
||||
throw new Error('Invalid cursor for keyset pagination');
|
||||
}
|
||||
}
|
||||
|
||||
export function paginateByKeyset<T>(
|
||||
items: T[],
|
||||
maxCount: number,
|
||||
cursor: KeysetCursorPayload | undefined,
|
||||
keyOf: (item: T) => string,
|
||||
): KeysetPage<T> {
|
||||
if (maxCount <= 0) {
|
||||
return { items: [], hasMore: false };
|
||||
}
|
||||
|
||||
const start = cursor
|
||||
? Math.max(0, items.findIndex((item) => keyOf(item) === cursor.key) + 1)
|
||||
: 0;
|
||||
|
||||
const pageItems = items.slice(start, start + maxCount);
|
||||
const hasMore = start + pageItems.length < items.length;
|
||||
|
||||
if (!hasMore || pageItems.length === 0) {
|
||||
return {
|
||||
items: pageItems,
|
||||
hasMore,
|
||||
};
|
||||
}
|
||||
|
||||
const nextCursor = encodeKeysetCursor({
|
||||
key: keyOf(pageItems[pageItems.length - 1]!),
|
||||
});
|
||||
|
||||
return {
|
||||
items: pageItems,
|
||||
hasMore,
|
||||
nextCursor,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
import type { Feed } from './types.js';
|
||||
|
||||
export function parseCountString(raw: string | number | null | undefined): number {
|
||||
if (typeof raw === 'number') {
|
||||
return Number.isFinite(raw) ? raw : 0;
|
||||
}
|
||||
|
||||
const text = (raw ?? '').toString().trim().replace(/,/g, '');
|
||||
if (!text) return 0;
|
||||
|
||||
if (text.endsWith('万')) {
|
||||
const num = Number.parseFloat(text.slice(0, -1));
|
||||
if (Number.isNaN(num)) return 0;
|
||||
return Math.round(num * 10_000);
|
||||
}
|
||||
|
||||
const intNum = Number.parseInt(text, 10);
|
||||
return Number.isNaN(intNum) ? 0 : intNum;
|
||||
}
|
||||
|
||||
export function detectCaptchaText(text: string): boolean {
|
||||
const haystack = text.toLowerCase();
|
||||
return (
|
||||
haystack.includes('captcha') ||
|
||||
haystack.includes('show_captcha') ||
|
||||
haystack.includes('验证码') ||
|
||||
haystack.includes('tencentcaptcha')
|
||||
);
|
||||
}
|
||||
|
||||
export function extractLinkIdFromUrl(rawUrl: string): string | undefined {
|
||||
const trimmed = rawUrl.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
try {
|
||||
const url = /^https?:\/\//i.test(trimmed)
|
||||
? new URL(trimmed)
|
||||
: trimmed.startsWith('/')
|
||||
? new URL(`https://www.xiaoheihe.cn${trimmed}`)
|
||||
: new URL(`https://${trimmed}`);
|
||||
|
||||
const pathMatch = url.pathname.match(/\/app\/bbs\/link\/(\d+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
|
||||
const queryLinkId = url.searchParams.get('link_id') ?? url.searchParams.get('linkid');
|
||||
return queryLinkId || undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function extractUserIdFromUrl(rawUrl: string): string | undefined {
|
||||
const trimmed = rawUrl.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
try {
|
||||
const url = /^https?:\/\//i.test(trimmed)
|
||||
? new URL(trimmed)
|
||||
: trimmed.startsWith('/')
|
||||
? new URL(`https://www.xiaoheihe.cn${trimmed}`)
|
||||
: new URL(`https://${trimmed}`);
|
||||
|
||||
const pathMatch = url.pathname.match(/\/app\/user\/profile\/(\d+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
|
||||
const queryUserId = url.searchParams.get('userid') ?? url.searchParams.get('user_id');
|
||||
return queryUserId || undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function parseFeedsFromHtmlSnapshot(html: string): Feed[] {
|
||||
const matches = [...html.matchAll(/href="(\/app\/bbs\/link\/\d+)"/g)];
|
||||
const seen = new Set<string>();
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const m of matches) {
|
||||
const href = m[1];
|
||||
if (!href) continue;
|
||||
const id = extractLinkIdFromUrl(href);
|
||||
if (!id || seen.has(id)) continue;
|
||||
seen.add(id);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
title: '',
|
||||
description: '',
|
||||
coverUrl: '',
|
||||
likeCount: 0,
|
||||
commentCount: 0,
|
||||
user: {
|
||||
id: '',
|
||||
nickname: '',
|
||||
avatar: '',
|
||||
},
|
||||
linkUrl: `https://www.xiaoheihe.cn${href}`,
|
||||
});
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
export function firstNonEmpty(...values: Array<string | null | undefined>): string {
|
||||
for (const value of values) {
|
||||
const trimmed = value?.trim();
|
||||
if (trimmed) return trimmed;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
@@ -0,0 +1,266 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { Comment, FeedDetail } from './types.js';
|
||||
import { detectCaptchaText, firstNonEmpty, parseCountString } from './extractors.js';
|
||||
|
||||
const log = logger.child({ module: 'xhh-feed-detail' });
|
||||
|
||||
function buildDetailUrl(linkId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/bbs/link/${encodeURIComponent(linkId)}`;
|
||||
}
|
||||
|
||||
interface RawComment {
|
||||
id: string;
|
||||
parentId?: string;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
likeCount: string | number;
|
||||
createTime: string;
|
||||
subComments: RawComment[];
|
||||
}
|
||||
|
||||
interface RawDetail {
|
||||
title: string;
|
||||
description: string;
|
||||
images: string[];
|
||||
likeCount: string | number;
|
||||
favoriteCount: string | number;
|
||||
commentCount: string | number;
|
||||
isLiked: boolean;
|
||||
isFavorited: boolean;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
comments: RawComment[];
|
||||
}
|
||||
|
||||
export async function getFeedDetail(page: Page, linkId: string): Promise<FeedDetail> {
|
||||
const url = buildDetailUrl(linkId);
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const bodyText = await page.textContent('body').catch(() => '');
|
||||
if (bodyText && detectCaptchaText(bodyText)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on feed detail');
|
||||
}
|
||||
|
||||
const raw = await page.evaluate((selectors: typeof XHH_SELECTORS) => {
|
||||
const pickText = (selector: string): string =>
|
||||
(document.querySelector(selector)?.textContent ?? '').trim();
|
||||
const pickFrom = (selectorList: readonly string[]): string => {
|
||||
for (const selector of selectorList) {
|
||||
const text = pickText(selector);
|
||||
if (text) return text;
|
||||
}
|
||||
return '';
|
||||
};
|
||||
const pickAttr = (selector: string, attr: string): string =>
|
||||
(document.querySelector(selector)?.getAttribute(attr) ?? '').trim();
|
||||
|
||||
const title = pickFrom(selectors.detail.title);
|
||||
const description = pickFrom(selectors.detail.description);
|
||||
const images = [...document.querySelectorAll<HTMLImageElement>(selectors.detail.image)]
|
||||
.map((img) => img.src)
|
||||
.filter(Boolean);
|
||||
const likeCount = pickText(selectors.detail.likeCount);
|
||||
const favoriteCount = pickText(selectors.detail.favoriteCount);
|
||||
const commentCount = pickText(selectors.detail.commentCount);
|
||||
|
||||
const likeBtn = selectors.detail.likeButton
|
||||
.map((sel: string) => document.querySelector(sel))
|
||||
.find(Boolean) as Element | undefined;
|
||||
const favBtn = selectors.detail.favoriteButton
|
||||
.map((sel: string) => document.querySelector(sel))
|
||||
.find(Boolean) as Element | undefined;
|
||||
|
||||
const isLiked =
|
||||
Boolean(likeBtn?.getAttribute('aria-pressed') === 'true') ||
|
||||
Boolean(likeBtn?.className.toString().toLowerCase().includes('active')) ||
|
||||
Boolean(likeBtn?.innerHTML.toLowerCase().includes('filled'));
|
||||
const isFavorited =
|
||||
Boolean(favBtn?.getAttribute('aria-pressed') === 'true') ||
|
||||
Boolean(favBtn?.className.toString().toLowerCase().includes('active')) ||
|
||||
Boolean(favBtn?.innerHTML.toLowerCase().includes('filled'));
|
||||
|
||||
const userLink = pickAttr(selectors.detail.userLink, 'href');
|
||||
const nickname = pickFrom(selectors.detail.userName);
|
||||
const avatar = pickAttr(selectors.detail.userAvatar, 'src');
|
||||
|
||||
const commentSelector = selectors.detail.commentItem.join(', ');
|
||||
const subSelector = selectors.detail.subCommentItem.join(', ');
|
||||
const comments: RawComment[] = [];
|
||||
|
||||
for (const node of document.querySelectorAll<HTMLElement>(commentSelector)) {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
|
||||
const authorNode = node.querySelector(selectors.detail.commentAuthor);
|
||||
const authorLink = authorNode?.getAttribute('href') ?? '';
|
||||
const userId = authorLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '';
|
||||
const nickname = (authorNode?.textContent ?? '').trim();
|
||||
const avatar = (node.querySelector(selectors.detail.commentAvatar) as HTMLImageElement | null)?.src ?? '';
|
||||
const content = (node.querySelector(selectors.detail.commentContent)?.textContent ?? '').trim();
|
||||
const createTime = (node.querySelector(selectors.detail.commentTime)?.textContent ?? '').trim();
|
||||
const likeCount = (node.querySelector(selectors.detail.commentLikeCount)?.textContent ?? '').trim();
|
||||
|
||||
const subComments: RawComment[] = [];
|
||||
for (const subNode of node.querySelectorAll<HTMLElement>(subSelector)) {
|
||||
const subId =
|
||||
subNode.getAttribute('data-comment-id') ||
|
||||
subNode.getAttribute('comment-id') ||
|
||||
subNode.id ||
|
||||
'';
|
||||
const subAuthorNode = subNode.querySelector(selectors.detail.commentAuthor);
|
||||
const subAuthorLink = subAuthorNode?.getAttribute('href') ?? '';
|
||||
const subUserId = subAuthorLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '';
|
||||
subComments.push({
|
||||
id: subId,
|
||||
parentId: id || undefined,
|
||||
userId: subUserId,
|
||||
nickname: (subAuthorNode?.textContent ?? '').trim(),
|
||||
avatar: (subNode.querySelector(selectors.detail.commentAvatar) as HTMLImageElement | null)?.src ?? '',
|
||||
content: (subNode.querySelector(selectors.detail.commentContent)?.textContent ?? '').trim(),
|
||||
createTime: (subNode.querySelector(selectors.detail.commentTime)?.textContent ?? '').trim(),
|
||||
likeCount: (subNode.querySelector(selectors.detail.commentLikeCount)?.textContent ?? '').trim(),
|
||||
subComments: [],
|
||||
});
|
||||
}
|
||||
|
||||
comments.push({
|
||||
id,
|
||||
userId,
|
||||
nickname,
|
||||
avatar,
|
||||
content,
|
||||
likeCount,
|
||||
createTime,
|
||||
subComments,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
title,
|
||||
description,
|
||||
images,
|
||||
likeCount,
|
||||
favoriteCount,
|
||||
commentCount,
|
||||
isLiked,
|
||||
isFavorited,
|
||||
userId: userLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '',
|
||||
nickname,
|
||||
avatar,
|
||||
comments,
|
||||
};
|
||||
}, XHH_SELECTORS) as RawDetail;
|
||||
|
||||
const detail: FeedDetail = {
|
||||
id: linkId,
|
||||
title: raw.title,
|
||||
description: raw.description,
|
||||
images: raw.images,
|
||||
likeCount: parseCountString(raw.likeCount),
|
||||
favoriteCount: parseCountString(raw.favoriteCount),
|
||||
commentCount: parseCountString(raw.commentCount),
|
||||
isLiked: raw.isLiked,
|
||||
isFavorited: raw.isFavorited,
|
||||
user: {
|
||||
id: raw.userId,
|
||||
nickname: raw.nickname,
|
||||
avatar: raw.avatar,
|
||||
},
|
||||
comments: raw.comments.map(normalizeComment),
|
||||
};
|
||||
|
||||
if (!detail.title && !detail.description) {
|
||||
throw new Error('waiting for selector: xhh detail not found');
|
||||
}
|
||||
|
||||
log.info({ linkId, commentCount: detail.comments.length }, 'xhh feed detail extracted');
|
||||
return detail;
|
||||
}
|
||||
|
||||
export async function getSubComments(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
commentId: string,
|
||||
maxCount = 200,
|
||||
): Promise<Comment[]> {
|
||||
const url = buildDetailUrl(linkId);
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on sub-comments page');
|
||||
}
|
||||
|
||||
const expandSelector = XHH_SELECTORS.detail.commentExpandReplies;
|
||||
await page.locator(expandSelector).first().click().catch(() => {});
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
const subComments = await page.evaluate(
|
||||
({ selectors, targetCommentId }) => {
|
||||
const commentSelector = selectors.detail.commentItem.join(', ');
|
||||
const subSelector = selectors.detail.subCommentItem.join(', ');
|
||||
|
||||
const comments = [...document.querySelectorAll<HTMLElement>(commentSelector)];
|
||||
const target = comments.find((node) => {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
if (id === targetCommentId) return true;
|
||||
return node.outerHTML.includes(targetCommentId);
|
||||
});
|
||||
if (!target) return [] as RawComment[];
|
||||
|
||||
const out: RawComment[] = [];
|
||||
for (const node of target.querySelectorAll<HTMLElement>(subSelector)) {
|
||||
const authorNode = node.querySelector(selectors.detail.commentAuthor);
|
||||
const authorLink = authorNode?.getAttribute('href') ?? '';
|
||||
out.push({
|
||||
id:
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'',
|
||||
parentId: targetCommentId,
|
||||
userId: authorLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '',
|
||||
nickname: (authorNode?.textContent ?? '').trim(),
|
||||
avatar: (node.querySelector(selectors.detail.commentAvatar) as HTMLImageElement | null)?.src ?? '',
|
||||
content: (node.querySelector(selectors.detail.commentContent)?.textContent ?? '').trim(),
|
||||
createTime: (node.querySelector(selectors.detail.commentTime)?.textContent ?? '').trim(),
|
||||
likeCount: (node.querySelector(selectors.detail.commentLikeCount)?.textContent ?? '').trim(),
|
||||
subComments: [],
|
||||
});
|
||||
}
|
||||
return out;
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, targetCommentId: commentId },
|
||||
);
|
||||
|
||||
return subComments.slice(0, maxCount).map(normalizeComment);
|
||||
}
|
||||
|
||||
function normalizeComment(input: RawComment): Comment {
|
||||
return {
|
||||
id: firstNonEmpty(input.id, `${Date.now()}-${Math.random()}`),
|
||||
...(input.parentId ? { parentId: input.parentId } : {}),
|
||||
userId: input.userId,
|
||||
nickname: input.nickname,
|
||||
avatar: input.avatar,
|
||||
content: input.content,
|
||||
likeCount: parseCountString(input.likeCount),
|
||||
createTime: input.createTime,
|
||||
subComments: input.subComments.map(normalizeComment),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,268 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { Feed } from './types.js';
|
||||
import {
|
||||
detectCaptchaText,
|
||||
extractLinkIdFromUrl,
|
||||
firstNonEmpty,
|
||||
parseCountString,
|
||||
} from './extractors.js';
|
||||
|
||||
const HOME_URL = 'https://www.xiaoheihe.cn/app/bbs/home';
|
||||
const log = logger.child({ module: 'xhh-feeds' });
|
||||
|
||||
interface RawFeedCandidate {
|
||||
id?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
coverUrl?: string;
|
||||
likeCount?: string | number;
|
||||
commentCount?: string | number;
|
||||
userId?: string;
|
||||
nickname?: string;
|
||||
avatar?: string;
|
||||
linkUrl?: string;
|
||||
}
|
||||
|
||||
export async function listFeeds(page: Page): Promise<Feed[]> {
|
||||
await page.goto(HOME_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_500);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on feeds page');
|
||||
}
|
||||
|
||||
const nuxtFeeds = await extractFeedsFromNuxt(page);
|
||||
const domFeeds = await extractFeedsFromDom(page);
|
||||
|
||||
const merged = [...nuxtFeeds, ...domFeeds];
|
||||
const result = dedupeAndNormalize(merged);
|
||||
log.info({ count: result.length }, 'xhh feeds extracted');
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function searchFeeds(page: Page, keyword: string): Promise<Feed[]> {
|
||||
const targetUrl = `https://www.xiaoheihe.cn/app/bbs/search?keyword=${encodeURIComponent(keyword)}`;
|
||||
await page.goto(targetUrl, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on search page');
|
||||
}
|
||||
|
||||
const combined = dedupeAndNormalize([
|
||||
...(await extractFeedsFromNuxt(page)),
|
||||
...(await extractFeedsFromDom(page)),
|
||||
]);
|
||||
|
||||
if (combined.length > 0) {
|
||||
return combined.filter((item) => {
|
||||
const haystack = `${item.title} ${item.description} ${item.user.nickname}`.toLowerCase();
|
||||
return haystack.includes(keyword.toLowerCase());
|
||||
});
|
||||
}
|
||||
|
||||
// Fallback: when search route structure changes, use home feeds and filter.
|
||||
const homeFeeds = await listFeeds(page);
|
||||
return homeFeeds.filter((item) => {
|
||||
const haystack = `${item.title} ${item.description} ${item.user.nickname}`.toLowerCase();
|
||||
return haystack.includes(keyword.toLowerCase());
|
||||
});
|
||||
}
|
||||
|
||||
async function extractFeedsFromNuxt(page: Page): Promise<RawFeedCandidate[]> {
|
||||
const data = await page
|
||||
.evaluate(() => {
|
||||
const root: unknown =
|
||||
(window as { __NUXT_DATA__?: unknown }).__NUXT_DATA__ ??
|
||||
(window as { __NUXT__?: { data?: unknown } }).__NUXT__?.data ??
|
||||
null;
|
||||
|
||||
const out: Array<Record<string, unknown>> = [];
|
||||
const visited = new Set<unknown>();
|
||||
|
||||
function walk(value: unknown): void {
|
||||
if (!value || typeof value !== 'object') return;
|
||||
if (visited.has(value)) return;
|
||||
visited.add(value);
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
for (const item of value) walk(item);
|
||||
return;
|
||||
}
|
||||
|
||||
const obj = value as Record<string, unknown>;
|
||||
|
||||
const id =
|
||||
(typeof obj['link_id'] === 'string' && obj['link_id']) ||
|
||||
(typeof obj['linkid'] === 'string' && obj['linkid']) ||
|
||||
(typeof obj['id'] === 'string' && obj['id']) ||
|
||||
(typeof obj['post_id'] === 'string' && obj['post_id']) ||
|
||||
'';
|
||||
const url =
|
||||
(typeof obj['link_url'] === 'string' && obj['link_url']) ||
|
||||
(typeof obj['url'] === 'string' && obj['url']) ||
|
||||
'';
|
||||
const title =
|
||||
(typeof obj['title'] === 'string' && obj['title']) ||
|
||||
(typeof obj['subject'] === 'string' && obj['subject']) ||
|
||||
'';
|
||||
|
||||
const hasLink = (typeof url === 'string' && url.includes('/app/bbs/link/'));
|
||||
if (id || hasLink || title) {
|
||||
out.push(obj);
|
||||
}
|
||||
|
||||
for (const next of Object.values(obj)) {
|
||||
walk(next);
|
||||
}
|
||||
}
|
||||
|
||||
walk(root);
|
||||
return out.slice(0, 500);
|
||||
})
|
||||
.catch(() => []);
|
||||
|
||||
return (data as Array<Record<string, unknown>>).map((item) => {
|
||||
const linkUrl = firstNonEmpty(
|
||||
valueString(item['link_url']),
|
||||
valueString(item['url']),
|
||||
valueString(item['jump_url']),
|
||||
);
|
||||
const user = (item['user'] ?? item['author']) as Record<string, unknown> | undefined;
|
||||
return {
|
||||
id: firstNonEmpty(
|
||||
valueString(item['link_id']),
|
||||
valueString(item['linkid']),
|
||||
valueString(item['post_id']),
|
||||
valueString(item['id']),
|
||||
),
|
||||
title: firstNonEmpty(valueString(item['title']), valueString(item['subject'])),
|
||||
description: firstNonEmpty(
|
||||
valueString(item['description']),
|
||||
valueString(item['content']),
|
||||
valueString(item['desc']),
|
||||
),
|
||||
coverUrl: firstNonEmpty(
|
||||
valueString(item['cover']),
|
||||
valueString(item['cover_url']),
|
||||
valueString(item['image']),
|
||||
),
|
||||
likeCount: valueString(item['like_count']) || valueString(item['likes']),
|
||||
commentCount: valueString(item['comment_count']) || valueString(item['comments']),
|
||||
userId: firstNonEmpty(
|
||||
valueString(user?.['userid']),
|
||||
valueString(user?.['user_id']),
|
||||
valueString(item['userid']),
|
||||
),
|
||||
nickname: firstNonEmpty(
|
||||
valueString(user?.['nickname']),
|
||||
valueString(user?.['name']),
|
||||
valueString(item['nickname']),
|
||||
),
|
||||
avatar: firstNonEmpty(
|
||||
valueString(user?.['avatar']),
|
||||
valueString(user?.['avatar_url']),
|
||||
),
|
||||
linkUrl,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function extractFeedsFromDom(page: Page): Promise<RawFeedCandidate[]> {
|
||||
return page
|
||||
.evaluate((selectors) => {
|
||||
const anchors = [...document.querySelectorAll<HTMLAnchorElement>(selectors.feed.link)]
|
||||
.filter((a) => Boolean(a.getAttribute('href')));
|
||||
|
||||
const feeds: RawFeedCandidate[] = [];
|
||||
const cardSelector = selectors.feed.card.join(', ');
|
||||
const titleSelector = selectors.feed.title.join(', ');
|
||||
const descSelector = selectors.feed.description.join(', ');
|
||||
const userNameSelector = selectors.feed.userName.join(', ');
|
||||
const likeSelector = selectors.feed.likeCount.join(', ');
|
||||
const commentSelector = selectors.feed.commentCount.join(', ');
|
||||
|
||||
for (const link of anchors) {
|
||||
const href = link.getAttribute('href') ?? '';
|
||||
const card = link.closest(cardSelector) ?? link.parentElement;
|
||||
const title = (card?.querySelector(titleSelector)?.textContent ?? '').trim();
|
||||
const description = (card?.querySelector(descSelector)?.textContent ?? '').trim();
|
||||
const cover = (card?.querySelector(selectors.feed.cover) as HTMLImageElement | null)?.src ?? '';
|
||||
const userNode = card?.querySelector(selectors.feed.userLink) as HTMLAnchorElement | null;
|
||||
const username = (card?.querySelector(userNameSelector)?.textContent ?? '').trim();
|
||||
const likeCount = (card?.querySelector(likeSelector)?.textContent ?? '').trim();
|
||||
const commentCount = (card?.querySelector(commentSelector)?.textContent ?? '').trim();
|
||||
|
||||
feeds.push({
|
||||
linkUrl: href,
|
||||
title,
|
||||
description,
|
||||
coverUrl: cover,
|
||||
userId: userNode?.getAttribute('href') ?? '',
|
||||
nickname: username,
|
||||
avatar: (card?.querySelector('img') as HTMLImageElement | null)?.src ?? '',
|
||||
likeCount,
|
||||
commentCount,
|
||||
});
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}, XHH_SELECTORS)
|
||||
.catch(() => []);
|
||||
}
|
||||
|
||||
function dedupeAndNormalize(items: RawFeedCandidate[]): Feed[] {
|
||||
const output: Feed[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const item of items) {
|
||||
const linkId = firstNonEmpty(item.id, item.linkUrl ? extractLinkIdFromUrl(item.linkUrl) ?? '' : '');
|
||||
if (!linkId || seen.has(linkId)) continue;
|
||||
seen.add(linkId);
|
||||
|
||||
const linkUrl = normalizeLinkUrl(item.linkUrl, linkId);
|
||||
const userIdFromHref = item.userId ? extractUserIdFromMaybeHref(item.userId) : '';
|
||||
|
||||
output.push({
|
||||
id: linkId,
|
||||
title: item.title?.trim() ?? '',
|
||||
description: item.description?.trim() ?? '',
|
||||
coverUrl: item.coverUrl?.trim() ?? '',
|
||||
likeCount: parseCountString(item.likeCount),
|
||||
commentCount: parseCountString(item.commentCount),
|
||||
user: {
|
||||
id: userIdFromHref,
|
||||
nickname: item.nickname?.trim() ?? '',
|
||||
avatar: item.avatar?.trim() ?? '',
|
||||
},
|
||||
linkUrl,
|
||||
});
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function normalizeLinkUrl(rawUrl: string | undefined, linkId: string): string {
|
||||
const trimmed = rawUrl?.trim() ?? '';
|
||||
if (!trimmed) return `https://www.xiaoheihe.cn/app/bbs/link/${linkId}`;
|
||||
if (/^https?:\/\//i.test(trimmed)) return trimmed;
|
||||
if (trimmed.startsWith('/')) return `https://www.xiaoheihe.cn${trimmed}`;
|
||||
return `https://${trimmed}`;
|
||||
}
|
||||
|
||||
function extractUserIdFromMaybeHref(raw: string): string {
|
||||
const normalized = raw.startsWith('/') ? `https://www.xiaoheihe.cn${raw}` : raw;
|
||||
return normalized.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '';
|
||||
}
|
||||
|
||||
function valueString(value: unknown): string {
|
||||
if (typeof value === 'string') return value;
|
||||
if (typeof value === 'number') return String(value);
|
||||
return '';
|
||||
}
|
||||
|
||||
@@ -0,0 +1,389 @@
|
||||
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import type { Router } from 'express';
|
||||
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { config } from '@social/core/config/index.js';
|
||||
import type { PlatformPlugin } from '@social/core/server/app.js';
|
||||
import { withErrorHandling, type McpToolResult } from '@social/core/utils/errors.js';
|
||||
import { computeIdempotencyHash, getIdempotencyStore } from '@social/core/utils/idempotency.js';
|
||||
import { deleteCookies, checkLoginStatus, getLoginQRCode } from './login.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { getFeedDetail, getSubComments } from './feed-detail.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
import { listMyPosts } from './my-posts.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { setFavoriteState, setLikeState } from './interaction.js';
|
||||
import { resolveFeedTarget, resolveUserTarget } from './target-resolver.js';
|
||||
import {
|
||||
CheckLoginSchema,
|
||||
DeleteCookiesSchema,
|
||||
GetFeedDetailSchema,
|
||||
GetLoginQRCodeSchema,
|
||||
GetSubCommentsSchema,
|
||||
GetUserProfileSchema,
|
||||
ListFeedsSchema,
|
||||
ListMyPostsSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
SearchSchema,
|
||||
SetFavoriteStateSchema,
|
||||
SetLikeStateSchema,
|
||||
} from './schemas.js';
|
||||
import { createXhhRoutes } from './routes.js';
|
||||
import { decodeKeysetCursor, paginateByKeyset } from './cursor.js';
|
||||
import type { Comment } from './types.js';
|
||||
|
||||
const PLATFORM = 'xiaoheihe';
|
||||
const DEFAULT_PAGE_SIZE = 20;
|
||||
const MAX_PAGE_SIZE = 200;
|
||||
|
||||
type McpMeta = Record<string, unknown>;
|
||||
|
||||
function ok(data: unknown, meta?: McpMeta): McpToolResult {
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: JSON.stringify({
|
||||
success: true,
|
||||
data,
|
||||
meta: meta ?? {},
|
||||
}),
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
function clampPageSize(maxCount?: number): number {
|
||||
return Math.min(MAX_PAGE_SIZE, Math.max(1, maxCount ?? DEFAULT_PAGE_SIZE));
|
||||
}
|
||||
|
||||
function paginationMeta(
|
||||
cursor: string | undefined,
|
||||
maxCount: number,
|
||||
returned: number,
|
||||
nextCursor?: string,
|
||||
): McpMeta {
|
||||
return {
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: cursor ?? '',
|
||||
max_count: maxCount,
|
||||
returned,
|
||||
...(nextCursor ? { next_cursor: nextCursor } : {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function runWithIdempotency<T>(
|
||||
toolName: string,
|
||||
requestId: string | undefined,
|
||||
inputForHash: unknown,
|
||||
execute: () => Promise<T>,
|
||||
): Promise<{ data: T; meta?: McpMeta }> {
|
||||
if (!requestId) {
|
||||
return { data: await execute() };
|
||||
}
|
||||
|
||||
const store = getIdempotencyStore();
|
||||
const inputHash = computeIdempotencyHash(inputForHash);
|
||||
const existing = store.get(toolName, requestId);
|
||||
|
||||
if (existing) {
|
||||
if (existing.inputHash !== inputHash) {
|
||||
throw new Error('request_id already used with different parameters');
|
||||
}
|
||||
return {
|
||||
data: existing.responseData as T,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: true,
|
||||
first_processed_at: existing.createdAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const data = await execute();
|
||||
store.put(toolName, requestId, inputHash, data);
|
||||
return {
|
||||
data,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function compareCommentKey(a: Comment, b: Comment): number {
|
||||
const timeCmp = a.createTime.localeCompare(b.createTime);
|
||||
if (timeCmp !== 0) return timeCmp;
|
||||
return a.id.localeCompare(b.id);
|
||||
}
|
||||
|
||||
export const xiaoheihePlugin: PlatformPlugin = {
|
||||
name: PLATFORM,
|
||||
apiNamespace: 'xhh',
|
||||
|
||||
registerRoutes(router: Router, browser: BrowserManager): void {
|
||||
const xhhRouter = createXhhRoutes(browser);
|
||||
router.use('/', xhhRouter);
|
||||
},
|
||||
|
||||
registerTools(server: McpServer, browser: BrowserManager): void {
|
||||
server.tool(
|
||||
'xhh_check_login',
|
||||
'Check Xiaoheihe login status',
|
||||
CheckLoginSchema,
|
||||
async () => withErrorHandling('xhh_check_login', async () => {
|
||||
const timeoutMs = config.operationTimeouts['login'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const status = await browser.withPage(PLATFORM, async (page) => checkLoginStatus(page), timeoutMs);
|
||||
return ok({
|
||||
logged_in: status.loggedIn,
|
||||
...(status.username ? { username: status.username } : {}),
|
||||
...(status.avatar ? { avatar: status.avatar } : {}),
|
||||
...(status.userId ? { user_id: status.userId } : {}),
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_login_qrcode',
|
||||
'Get Xiaoheihe login QR code',
|
||||
GetLoginQRCodeSchema,
|
||||
async () => withErrorHandling('xhh_get_login_qrcode', async () => {
|
||||
const qr = await getLoginQRCode(browser);
|
||||
return ok({
|
||||
qrcode_data: qr.qrcodeData,
|
||||
already_logged_in: qr.alreadyLoggedIn,
|
||||
timeout: qr.timeout,
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_delete_cookies',
|
||||
'Delete Xiaoheihe cookies and reset login session',
|
||||
DeleteCookiesSchema,
|
||||
async () => withErrorHandling('xhh_delete_cookies', async () => {
|
||||
await deleteCookies(browser);
|
||||
return ok({ deleted: true });
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_list_feeds',
|
||||
'List Xiaoheihe feed cards',
|
||||
ListFeedsSchema,
|
||||
async (args) => withErrorHandling('xhh_list_feeds', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const feeds = await browser.withPage(PLATFORM, async (page) => listFeeds(page), timeoutMs);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(feeds, limit, cursor, (item) => item.id);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_search',
|
||||
'Search Xiaoheihe posts by keyword',
|
||||
SearchSchema,
|
||||
async (args) => withErrorHandling('xhh_search', async () => {
|
||||
const timeoutMs = config.operationTimeouts['search'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => searchFeeds(page, args.keyword),
|
||||
timeoutMs,
|
||||
);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(feeds, limit, cursor, (item) => item.id);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_feed_detail',
|
||||
'Get Xiaoheihe feed detail with first-screen comments',
|
||||
GetFeedDetailSchema,
|
||||
async (args) => withErrorHandling('xhh_get_feed_detail', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const target = resolveFeedTarget({
|
||||
link_id: args.link_id,
|
||||
url: args.url,
|
||||
});
|
||||
const detail = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getFeedDetail(page, target.linkId),
|
||||
timeoutMs,
|
||||
);
|
||||
const { comments, ...rest } = detail;
|
||||
return ok({
|
||||
detail: rest,
|
||||
first_screen_comments: comments,
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_sub_comments',
|
||||
'Get sub-comments for a Xiaoheihe comment with keyset pagination',
|
||||
GetSubCommentsSchema,
|
||||
async (args) => withErrorHandling('xhh_get_sub_comments', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const loaded = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getSubComments(page, args.link_id, args.comment_id, MAX_PAGE_SIZE),
|
||||
timeoutMs,
|
||||
);
|
||||
const sorted = [...loaded].sort(compareCommentKey);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(
|
||||
sorted,
|
||||
limit,
|
||||
cursor,
|
||||
(item) => `${item.createTime}|${item.id}`,
|
||||
);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_user_profile',
|
||||
'Get Xiaoheihe user profile',
|
||||
GetUserProfileSchema,
|
||||
async (args) => withErrorHandling('xhh_get_user_profile', async () => {
|
||||
const timeoutMs = config.operationTimeouts['user_profile'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const target = resolveUserTarget({
|
||||
user_id: args.user_id,
|
||||
url: args.url,
|
||||
});
|
||||
const profile = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getUserProfile(page, target.userId),
|
||||
timeoutMs,
|
||||
);
|
||||
return ok({
|
||||
profile: {
|
||||
id: profile.id,
|
||||
nickname: profile.nickname,
|
||||
avatar: profile.avatar,
|
||||
description: profile.description,
|
||||
follows: profile.follows,
|
||||
fans: profile.fans,
|
||||
likes: profile.likes,
|
||||
},
|
||||
recent_posts: profile.posts,
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_list_my_posts',
|
||||
'List my Xiaoheihe posts',
|
||||
ListMyPostsSchema,
|
||||
async (args) => withErrorHandling('xhh_list_my_posts', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const posts = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => listMyPosts(page, args.type),
|
||||
timeoutMs,
|
||||
);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(posts, limit, cursor, (item) => `${item.modifyTime ?? item.createTime ?? ''}|${item.id}`);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_post_comment',
|
||||
'Post a comment on Xiaoheihe',
|
||||
PostCommentSchema,
|
||||
async (args) => withErrorHandling('xhh_post_comment', async () => {
|
||||
const timeoutMs = config.operationTimeouts['comment'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_post_comment',
|
||||
args.request_id,
|
||||
{
|
||||
link_id: args.link_id,
|
||||
content: args.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => postComment(page, args.link_id, args.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
return ok(result.data, result.meta);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_reply_comment',
|
||||
'Reply a comment on Xiaoheihe',
|
||||
ReplyCommentSchema,
|
||||
async (args) => withErrorHandling('xhh_reply_comment', async () => {
|
||||
const timeoutMs = config.operationTimeouts['reply'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_reply_comment',
|
||||
args.request_id,
|
||||
{
|
||||
link_id: args.link_id,
|
||||
comment_id: args.comment_id,
|
||||
content: args.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => replyComment(page, args.link_id, args.comment_id, args.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
return ok(result.data, result.meta);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_set_like_state',
|
||||
'Set like state for a Xiaoheihe post',
|
||||
SetLikeStateSchema,
|
||||
async (args) => withErrorHandling('xhh_set_like_state', async () => {
|
||||
const timeoutMs = config.operationTimeouts['like'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setLikeState(page, args.link_id, args.liked),
|
||||
timeoutMs,
|
||||
);
|
||||
return ok(result);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_set_favorite_state',
|
||||
'Set favorite state for a Xiaoheihe post',
|
||||
SetFavoriteStateSchema,
|
||||
async (args) => withErrorHandling('xhh_set_favorite_state', async () => {
|
||||
const timeoutMs = config.operationTimeouts['favorite'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setFavoriteState(page, args.link_id, args.favorited),
|
||||
timeoutMs,
|
||||
);
|
||||
return ok(result);
|
||||
}),
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import { detectCaptchaText } from './extractors.js';
|
||||
|
||||
function buildDetailUrl(linkId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/bbs/link/${encodeURIComponent(linkId)}`;
|
||||
}
|
||||
|
||||
export async function setLikeState(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
targetState: boolean,
|
||||
): Promise<{ success: boolean; state: boolean; changed: boolean }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on interaction page');
|
||||
}
|
||||
|
||||
const current = await readButtonState(page, XHH_SELECTORS.detail.likeButton);
|
||||
if (current === targetState) {
|
||||
return { success: true, state: current, changed: false };
|
||||
}
|
||||
|
||||
const clicked = await clickAny(page, XHH_SELECTORS.detail.likeButton);
|
||||
if (!clicked) {
|
||||
return { success: false, state: current, changed: false };
|
||||
}
|
||||
await page.waitForTimeout(700);
|
||||
const state = await readButtonState(page, XHH_SELECTORS.detail.likeButton);
|
||||
return {
|
||||
success: state === targetState,
|
||||
state,
|
||||
changed: state !== current,
|
||||
};
|
||||
}
|
||||
|
||||
export async function setFavoriteState(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
targetState: boolean,
|
||||
): Promise<{ success: boolean; state: boolean; changed: boolean }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on interaction page');
|
||||
}
|
||||
|
||||
const current = await readButtonState(page, XHH_SELECTORS.detail.favoriteButton);
|
||||
if (current === targetState) {
|
||||
return { success: true, state: current, changed: false };
|
||||
}
|
||||
|
||||
const clicked = await clickAny(page, XHH_SELECTORS.detail.favoriteButton);
|
||||
if (!clicked) {
|
||||
return { success: false, state: current, changed: false };
|
||||
}
|
||||
await page.waitForTimeout(700);
|
||||
const state = await readButtonState(page, XHH_SELECTORS.detail.favoriteButton);
|
||||
return {
|
||||
success: state === targetState,
|
||||
state,
|
||||
changed: state !== current,
|
||||
};
|
||||
}
|
||||
|
||||
async function clickAny(page: Page, selectors: readonly string[]): Promise<boolean> {
|
||||
for (const selector of selectors) {
|
||||
const ok = await page.locator(selector).first().click({ timeout: 2_000 }).then(() => true).catch(() => false);
|
||||
if (ok) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function readButtonState(page: Page, selectors: readonly string[]): Promise<boolean> {
|
||||
for (const selector of selectors) {
|
||||
const state = await page
|
||||
.evaluate((sel) => {
|
||||
const node = document.querySelector(sel) as HTMLElement | null;
|
||||
if (!node) return null;
|
||||
if (node.getAttribute('aria-pressed') === 'true') return true;
|
||||
const cls = node.className.toString().toLowerCase();
|
||||
if (cls.includes('active') || cls.includes('selected')) return true;
|
||||
const html = node.innerHTML.toLowerCase();
|
||||
if (html.includes('filled') || html.includes('checked')) return true;
|
||||
return false;
|
||||
}, selector)
|
||||
.catch(() => null);
|
||||
if (typeof state === 'boolean') return state;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { cookieStore } from '@social/core/cookie/store.js';
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { LoginStatus, QRCodeResult } from './types.js';
|
||||
import { extractUserIdFromUrl, firstNonEmpty } from './extractors.js';
|
||||
|
||||
const PLATFORM = 'xiaoheihe';
|
||||
const HOME_URL = 'https://www.xiaoheihe.cn/app/bbs/home';
|
||||
const QR_SCAN_TIMEOUT_MS = 4 * 60_000;
|
||||
const LOGIN_SAFETY_TIMEOUT_MS = 5 * 60_000;
|
||||
|
||||
const log = logger.child({ module: 'xhh-login' });
|
||||
|
||||
export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
|
||||
await page.goto(HOME_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const indicator = await waitFirstSelector(page, XHH_SELECTORS.login.loggedInIndicators, 4_000);
|
||||
if (!indicator) {
|
||||
return { loggedIn: false };
|
||||
}
|
||||
|
||||
const username = firstNonEmpty(
|
||||
await textFromSelector(page, XHH_SELECTORS.login.username),
|
||||
await indicator.textContent().catch(() => ''),
|
||||
);
|
||||
|
||||
const avatar = await attrFromSelector(page, XHH_SELECTORS.login.avatar, 'src');
|
||||
const userLink = await attrFromSelector(page, XHH_SELECTORS.login.userLink, 'href');
|
||||
const userId = userLink ? extractUserIdFromUrl(userLink) : undefined;
|
||||
|
||||
return {
|
||||
loggedIn: true,
|
||||
...(username ? { username } : {}),
|
||||
...(avatar ? { avatar } : {}),
|
||||
...(userId ? { userId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export async function getLoginQRCode(browser: BrowserManager): Promise<QRCodeResult> {
|
||||
const { page, release } = await browser.acquirePage(PLATFORM);
|
||||
|
||||
const releaseTimer = setTimeout(() => {
|
||||
void release();
|
||||
}, LOGIN_SAFETY_TIMEOUT_MS);
|
||||
if (typeof releaseTimer === 'object' && 'unref' in releaseTimer) {
|
||||
releaseTimer.unref();
|
||||
}
|
||||
|
||||
try {
|
||||
await page.goto(HOME_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const status = await checkLoginStatus(page);
|
||||
if (status.loggedIn) {
|
||||
await release();
|
||||
clearTimeout(releaseTimer);
|
||||
return {
|
||||
qrcodeData: '',
|
||||
alreadyLoggedIn: true,
|
||||
timeout: '0',
|
||||
};
|
||||
}
|
||||
|
||||
const loginBtn = await page.$(XHH_SELECTORS.login.loginButton).catch(() => null);
|
||||
if (loginBtn) {
|
||||
await loginBtn.click().catch(() => {});
|
||||
await page.waitForTimeout(500);
|
||||
}
|
||||
|
||||
const qrcodeData = await extractQrCodeData(page);
|
||||
if (!qrcodeData) {
|
||||
await release();
|
||||
clearTimeout(releaseTimer);
|
||||
throw new Error('waiting for selector: xhh login qrcode');
|
||||
}
|
||||
|
||||
waitForLoginAndRelease(page, browser, release).catch((err: unknown) => {
|
||||
log.warn({ err }, 'background login wait failed');
|
||||
});
|
||||
|
||||
return {
|
||||
qrcodeData,
|
||||
alreadyLoggedIn: false,
|
||||
timeout: '4m',
|
||||
};
|
||||
} catch (err) {
|
||||
clearTimeout(releaseTimer);
|
||||
await release();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
export async function deleteCookies(browser: BrowserManager): Promise<void> {
|
||||
await cookieStore.delete(PLATFORM);
|
||||
await browser.clearContext(PLATFORM);
|
||||
}
|
||||
|
||||
async function waitForLoginAndRelease(
|
||||
page: Page,
|
||||
browser: BrowserManager,
|
||||
release: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
try {
|
||||
await waitFirstSelector(page, XHH_SELECTORS.login.loggedInIndicators, QR_SCAN_TIMEOUT_MS);
|
||||
await browser.saveCookies(PLATFORM);
|
||||
await browser.clearContext(PLATFORM);
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
}
|
||||
|
||||
async function waitFirstSelector(
|
||||
page: Page,
|
||||
selectors: readonly string[],
|
||||
timeout: number,
|
||||
) {
|
||||
const started = Date.now();
|
||||
for (const selector of selectors) {
|
||||
const remaining = Math.max(1, timeout - (Date.now() - started));
|
||||
const handle = await page.waitForSelector(selector, { timeout: remaining }).catch(() => null);
|
||||
if (handle) return handle;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function extractQrCodeData(page: Page): Promise<string> {
|
||||
for (const selector of XHH_SELECTORS.login.qrCodeImage) {
|
||||
const data = await page
|
||||
.evaluate((sel) => {
|
||||
const node = document.querySelector(sel);
|
||||
if (!node) return '';
|
||||
if (node instanceof HTMLImageElement) {
|
||||
return node.src || '';
|
||||
}
|
||||
if (node instanceof HTMLCanvasElement) {
|
||||
try {
|
||||
return node.toDataURL();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}, selector)
|
||||
.catch(() => '');
|
||||
if (data) return data;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
async function textFromSelector(page: Page, selector: string): Promise<string> {
|
||||
return page
|
||||
.$eval(selector, (el) => (el.textContent ?? '').trim())
|
||||
.catch(() => '');
|
||||
}
|
||||
|
||||
async function attrFromSelector(
|
||||
page: Page,
|
||||
selector: string,
|
||||
attr: string,
|
||||
): Promise<string> {
|
||||
return page
|
||||
.$eval(selector, (el, attrName) => el.getAttribute(attrName) ?? '', attr)
|
||||
.catch(() => '');
|
||||
}
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { MyPost, MyPostType } from './types.js';
|
||||
import { detectCaptchaText, extractLinkIdFromUrl, parseCountString } from './extractors.js';
|
||||
|
||||
const URL = 'https://www.xiaoheihe.cn/creator/content_management/home';
|
||||
const log = logger.child({ module: 'xhh-my-posts' });
|
||||
|
||||
const TAB_KEYWORDS: Record<MyPostType, string> = {
|
||||
all: '全部',
|
||||
article: '文章',
|
||||
image_text: '图文',
|
||||
video: '视频',
|
||||
};
|
||||
|
||||
export async function listMyPosts(
|
||||
page: Page,
|
||||
type: MyPostType = 'all',
|
||||
): Promise<MyPost[]> {
|
||||
await page.goto(URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on my-posts page');
|
||||
}
|
||||
|
||||
if (type !== 'all') {
|
||||
const keyword = TAB_KEYWORDS[type];
|
||||
const tabs = page.locator(XHH_SELECTORS.myPosts.tabButton);
|
||||
const count = await tabs.count().catch(() => 0);
|
||||
for (let i = 0; i < count; i += 1) {
|
||||
const tab = tabs.nth(i);
|
||||
const tabText = (await tab.textContent().catch(() => '')) ?? '';
|
||||
if (tabText.includes(keyword)) {
|
||||
await tab.click().catch(() => {});
|
||||
await page.waitForTimeout(500);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const rawItems = await page.evaluate((selectors) => {
|
||||
const container = [...document.querySelectorAll<HTMLElement>(selectors.myPosts.postItem)];
|
||||
return container.map((node) => {
|
||||
const linkNode = node.querySelector<HTMLAnchorElement>(selectors.myPosts.postLink);
|
||||
const href = linkNode?.getAttribute('href') ?? '';
|
||||
const title = (node.querySelector(selectors.myPosts.title)?.textContent ?? '').trim();
|
||||
const description = (node.querySelector(selectors.myPosts.description)?.textContent ?? '').trim();
|
||||
const time = (node.querySelector(selectors.myPosts.time)?.textContent ?? '').trim();
|
||||
const likeRaw = (node.querySelector('.like-count, .content-list__like-cnt')?.textContent ?? '').trim();
|
||||
const commentRaw = (node.querySelector('.comment-count, .content-list__comment-cnt')?.textContent ?? '').trim();
|
||||
const cover = (node.querySelector('img') as HTMLImageElement | null)?.src ?? '';
|
||||
return { href, title, description, time, likeRaw, commentRaw, cover };
|
||||
});
|
||||
}, XHH_SELECTORS);
|
||||
|
||||
const posts: MyPost[] = [];
|
||||
for (const item of rawItems) {
|
||||
const linkId = extractLinkIdFromUrl(item.href);
|
||||
if (!linkId) continue;
|
||||
const linkUrl = item.href.startsWith('http')
|
||||
? item.href
|
||||
: `https://www.xiaoheihe.cn${item.href}`;
|
||||
posts.push({
|
||||
id: linkId,
|
||||
type,
|
||||
title: item.title,
|
||||
description: item.description,
|
||||
coverUrl: item.cover,
|
||||
likeCount: parseCountString(item.likeRaw),
|
||||
commentCount: parseCountString(item.commentRaw),
|
||||
user: {
|
||||
id: '',
|
||||
nickname: '',
|
||||
avatar: '',
|
||||
},
|
||||
linkUrl,
|
||||
createTime: item.time,
|
||||
modifyTime: item.time,
|
||||
});
|
||||
}
|
||||
|
||||
log.info({ type, count: posts.length }, 'xhh my posts listed');
|
||||
return posts;
|
||||
}
|
||||
@@ -0,0 +1,464 @@
|
||||
import { Router } from 'express';
|
||||
import { z, ZodError } from 'zod';
|
||||
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { config } from '@social/core/config/index.js';
|
||||
import { rateLimiter } from '@social/core/server/middleware.js';
|
||||
import { classifyError, sanitizeErrorMessage } from '@social/core/utils/errors.js';
|
||||
import { computeIdempotencyHash, getIdempotencyStore } from '@social/core/utils/idempotency.js';
|
||||
import { decodeKeysetCursor, paginateByKeyset } from './cursor.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { getFeedDetail, getSubComments } from './feed-detail.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { setFavoriteState, setLikeState } from './interaction.js';
|
||||
import { checkLoginStatus, deleteCookies, getLoginQRCode } from './login.js';
|
||||
import { listMyPosts } from './my-posts.js';
|
||||
import {
|
||||
GetFeedDetailSchema,
|
||||
GetSubCommentsSchema,
|
||||
GetUserProfileSchema,
|
||||
ListMyPostsSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
SearchSchema,
|
||||
SetFavoriteStateSchema,
|
||||
SetLikeStateSchema,
|
||||
} from './schemas.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { resolveFeedTarget, resolveUserTarget } from './target-resolver.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
|
||||
const PLATFORM = 'xiaoheihe';
|
||||
const DEFAULT_PAGE_SIZE = 20;
|
||||
const MAX_PAGE_SIZE = 200;
|
||||
|
||||
const readRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 60 });
|
||||
const writeRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 10 });
|
||||
|
||||
const SearchBodySchema = z.object({
|
||||
keyword: SearchSchema.keyword,
|
||||
max_count: SearchSchema.max_count,
|
||||
cursor: SearchSchema.cursor,
|
||||
});
|
||||
|
||||
const FeedDetailBodySchema = z.object({
|
||||
link_id: GetFeedDetailSchema.link_id,
|
||||
url: GetFeedDetailSchema.url,
|
||||
});
|
||||
|
||||
const SubCommentsBodySchema = z.object({
|
||||
link_id: GetSubCommentsSchema.link_id,
|
||||
comment_id: GetSubCommentsSchema.comment_id,
|
||||
max_count: GetSubCommentsSchema.max_count,
|
||||
cursor: GetSubCommentsSchema.cursor,
|
||||
});
|
||||
|
||||
const UserProfileBodySchema = z.object({
|
||||
user_id: GetUserProfileSchema.user_id,
|
||||
url: GetUserProfileSchema.url,
|
||||
});
|
||||
|
||||
const PostCommentBodySchema = z.object({
|
||||
request_id: PostCommentSchema.request_id,
|
||||
link_id: PostCommentSchema.link_id,
|
||||
content: PostCommentSchema.content,
|
||||
});
|
||||
|
||||
const ReplyCommentBodySchema = z.object({
|
||||
request_id: ReplyCommentSchema.request_id,
|
||||
link_id: ReplyCommentSchema.link_id,
|
||||
comment_id: ReplyCommentSchema.comment_id,
|
||||
content: ReplyCommentSchema.content,
|
||||
});
|
||||
|
||||
const LikeBodySchema = z.object({
|
||||
link_id: SetLikeStateSchema.link_id,
|
||||
liked: SetLikeStateSchema.liked,
|
||||
});
|
||||
|
||||
const FavoriteBodySchema = z.object({
|
||||
link_id: SetFavoriteStateSchema.link_id,
|
||||
favorited: SetFavoriteStateSchema.favorited,
|
||||
});
|
||||
|
||||
interface ApiSuccessResponse<T> {
|
||||
success: true;
|
||||
data: T;
|
||||
}
|
||||
|
||||
interface ApiErrorResponse {
|
||||
success: false;
|
||||
error: {
|
||||
code: string;
|
||||
message: string;
|
||||
};
|
||||
}
|
||||
|
||||
function successResponse<T>(data: T): ApiSuccessResponse<T> {
|
||||
return { success: true, data };
|
||||
}
|
||||
|
||||
function errorResponse(code: string, message: string): ApiErrorResponse {
|
||||
return { success: false, error: { code, message } };
|
||||
}
|
||||
|
||||
function clampPageSize(maxCount?: number): number {
|
||||
return Math.min(MAX_PAGE_SIZE, Math.max(1, maxCount ?? DEFAULT_PAGE_SIZE));
|
||||
}
|
||||
|
||||
async function runWithIdempotency<T>(
|
||||
toolName: string,
|
||||
requestId: string | undefined,
|
||||
inputForHash: unknown,
|
||||
execute: () => Promise<T>,
|
||||
): Promise<{ data: T; meta?: Record<string, unknown> }> {
|
||||
if (!requestId) {
|
||||
return { data: await execute() };
|
||||
}
|
||||
|
||||
const store = getIdempotencyStore();
|
||||
const inputHash = computeIdempotencyHash(inputForHash);
|
||||
const existing = store.get(toolName, requestId);
|
||||
if (existing) {
|
||||
if (existing.inputHash !== inputHash) {
|
||||
throw new Error('request_id already used with different parameters');
|
||||
}
|
||||
return {
|
||||
data: existing.responseData as T,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: true,
|
||||
first_processed_at: existing.createdAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const data = await execute();
|
||||
store.put(toolName, requestId, inputHash, data);
|
||||
return {
|
||||
data,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function createXhhRoutes(browser: BrowserManager): Router {
|
||||
const router = Router();
|
||||
|
||||
router.get('/login/status', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const timeoutMs = config.operationTimeouts['login'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const status = await browser.withPage(PLATFORM, async (page) => checkLoginStatus(page), timeoutMs);
|
||||
res.json(successResponse({
|
||||
logged_in: status.loggedIn,
|
||||
...(status.username ? { username: status.username } : {}),
|
||||
...(status.avatar ? { avatar: status.avatar } : {}),
|
||||
...(status.userId ? { user_id: status.userId } : {}),
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.get('/login/qrcode', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const qr = await getLoginQRCode(browser);
|
||||
res.json(successResponse({
|
||||
qrcode_data: qr.qrcodeData,
|
||||
already_logged_in: qr.alreadyLoggedIn,
|
||||
timeout: qr.timeout,
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.delete('/login/cookies', writeRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
await deleteCookies(browser);
|
||||
res.json(successResponse({ deleted: true }));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.get('/feeds', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const query = z.object({
|
||||
max_count: z.coerce.number().int().min(1).max(200).optional().default(20),
|
||||
cursor: z.string().optional(),
|
||||
}).parse(req.query);
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const feeds = await browser.withPage(PLATFORM, async (page) => listFeeds(page), timeoutMs);
|
||||
const limit = clampPageSize(query.max_count);
|
||||
const paged = paginateByKeyset(feeds, limit, decodeKeysetCursor(query.cursor), (item) => item.id);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: query.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/search', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = SearchBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['search'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const items = await browser.withPage(PLATFORM, async (page) => searchFeeds(page, body.keyword), timeoutMs);
|
||||
const limit = clampPageSize(body.max_count);
|
||||
const paged = paginateByKeyset(items, limit, decodeKeysetCursor(body.cursor), (item) => item.id);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: body.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/feeds/detail', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FeedDetailBodySchema.parse(req.body);
|
||||
const target = resolveFeedTarget(body);
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const detail = await browser.withPage(PLATFORM, async (page) => getFeedDetail(page, target.linkId), timeoutMs);
|
||||
const { comments, ...rest } = detail;
|
||||
res.json(successResponse({
|
||||
detail: rest,
|
||||
first_screen_comments: comments,
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/feeds/sub-comments', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = SubCommentsBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const loaded = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getSubComments(page, body.link_id, body.comment_id, MAX_PAGE_SIZE),
|
||||
timeoutMs,
|
||||
);
|
||||
const sorted = [...loaded].sort((a, b) => {
|
||||
const timeCmp = a.createTime.localeCompare(b.createTime);
|
||||
if (timeCmp !== 0) return timeCmp;
|
||||
return a.id.localeCompare(b.id);
|
||||
});
|
||||
const limit = clampPageSize(body.max_count);
|
||||
const paged = paginateByKeyset(
|
||||
sorted,
|
||||
limit,
|
||||
decodeKeysetCursor(body.cursor),
|
||||
(item) => `${item.createTime}|${item.id}`,
|
||||
);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: body.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/user/profile', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = UserProfileBodySchema.parse(req.body);
|
||||
const target = resolveUserTarget(body);
|
||||
const timeoutMs = config.operationTimeouts['user_profile'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const profile = await browser.withPage(PLATFORM, async (page) => getUserProfile(page, target.userId), timeoutMs);
|
||||
res.json(successResponse({
|
||||
profile: {
|
||||
id: profile.id,
|
||||
nickname: profile.nickname,
|
||||
avatar: profile.avatar,
|
||||
description: profile.description,
|
||||
follows: profile.follows,
|
||||
fans: profile.fans,
|
||||
likes: profile.likes,
|
||||
},
|
||||
recent_posts: profile.posts,
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.get('/my-posts', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const query = z.object({
|
||||
type: ListMyPostsSchema.type,
|
||||
max_count: z.coerce.number().int().min(1).max(200).optional().default(20),
|
||||
cursor: z.string().optional(),
|
||||
}).parse(req.query);
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const posts = await browser.withPage(PLATFORM, async (page) => listMyPosts(page, query.type), timeoutMs);
|
||||
const limit = clampPageSize(query.max_count);
|
||||
const paged = paginateByKeyset(
|
||||
posts,
|
||||
limit,
|
||||
decodeKeysetCursor(query.cursor),
|
||||
(item) => `${item.modifyTime ?? item.createTime ?? ''}|${item.id}`,
|
||||
);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: query.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/comment', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PostCommentBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['comment'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_post_comment',
|
||||
body.request_id,
|
||||
{
|
||||
link_id: body.link_id,
|
||||
content: body.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => postComment(page, body.link_id, body.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
res.json(successResponse({
|
||||
...result.data,
|
||||
...(result.meta ? { meta: result.meta } : {}),
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/comment/reply', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = ReplyCommentBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['reply'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_reply_comment',
|
||||
body.request_id,
|
||||
{
|
||||
link_id: body.link_id,
|
||||
comment_id: body.comment_id,
|
||||
content: body.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => replyComment(page, body.link_id, body.comment_id, body.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
res.json(successResponse({
|
||||
...result.data,
|
||||
...(result.meta ? { meta: result.meta } : {}),
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/like/set-state', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = LikeBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['like'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setLikeState(page, body.link_id, body.liked),
|
||||
timeoutMs,
|
||||
);
|
||||
res.json(successResponse(result));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/favorite/set-state', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FavoriteBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['favorite'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setFavoriteState(page, body.link_id, body.favorited),
|
||||
timeoutMs,
|
||||
);
|
||||
res.json(successResponse(result));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
|
||||
function handleError(res: { status: (n: number) => { json: (body: ApiErrorResponse) => void } }, err: unknown): void {
|
||||
if (err instanceof ZodError) {
|
||||
const detail = err.issues.map((issue) => `${issue.path.join('.') || '<root>'}: ${issue.message}`).join('; ');
|
||||
res.status(400).json(errorResponse('VALIDATION_ERROR', detail));
|
||||
return;
|
||||
}
|
||||
|
||||
const e = err instanceof Error ? err : new Error(String(err));
|
||||
const category = classifyError(e);
|
||||
const message = sanitizeErrorMessage(e.message);
|
||||
const statusCode = category === 'AUTH_REQUIRED' ? 401 : 500;
|
||||
res.status(statusCode).json(errorResponse(category, message));
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
export const CheckLoginSchema = {};
|
||||
export const GetLoginQRCodeSchema = {};
|
||||
export const DeleteCookiesSchema = {};
|
||||
|
||||
export const ListFeedsSchema = {
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of feeds to return per page (1-200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Keyset pagination cursor returned by previous call'),
|
||||
};
|
||||
|
||||
export const SearchSchema = {
|
||||
keyword: z.string().min(1).describe('Search keyword'),
|
||||
max_count: ListFeedsSchema.max_count,
|
||||
cursor: ListFeedsSchema.cursor,
|
||||
};
|
||||
|
||||
export const GetFeedDetailSchema = {
|
||||
link_id: z.string().optional().describe('Link ID (required when url is absent)'),
|
||||
url: z.string().optional().describe('Detail page URL (auto-parse link_id)'),
|
||||
};
|
||||
|
||||
export const GetSubCommentsSchema = {
|
||||
link_id: z.string().describe('Link ID'),
|
||||
comment_id: z.string().describe('Parent comment ID'),
|
||||
max_count: ListFeedsSchema.max_count,
|
||||
cursor: ListFeedsSchema.cursor,
|
||||
};
|
||||
|
||||
export const GetUserProfileSchema = {
|
||||
user_id: z.string().optional().describe('User ID (required when url is absent)'),
|
||||
url: z.string().optional().describe('User profile URL (auto-parse user_id)'),
|
||||
};
|
||||
|
||||
export const ListMyPostsSchema = {
|
||||
type: z
|
||||
.enum(['all', 'article', 'image_text', 'video'])
|
||||
.optional()
|
||||
.default('all')
|
||||
.describe('Post type filter'),
|
||||
max_count: ListFeedsSchema.max_count,
|
||||
cursor: ListFeedsSchema.cursor,
|
||||
};
|
||||
|
||||
export const PostCommentSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key'),
|
||||
link_id: z.string().describe('Link ID'),
|
||||
content: z.string().min(1).describe('Comment content'),
|
||||
};
|
||||
|
||||
export const ReplyCommentSchema = {
|
||||
request_id: PostCommentSchema.request_id,
|
||||
link_id: z.string().describe('Link ID'),
|
||||
comment_id: z.string().describe('Target comment ID'),
|
||||
content: z.string().min(1).describe('Reply content'),
|
||||
};
|
||||
|
||||
export const SetLikeStateSchema = {
|
||||
link_id: z.string().describe('Link ID'),
|
||||
liked: z.boolean().describe('Target like state'),
|
||||
};
|
||||
|
||||
export const SetFavoriteStateSchema = {
|
||||
link_id: z.string().describe('Link ID'),
|
||||
favorited: z.boolean().describe('Target favorite state'),
|
||||
};
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import type { Feed } from './types.js';
|
||||
import { searchFeeds as runSearch } from './feeds.js';
|
||||
|
||||
export async function searchFeeds(page: Page, keyword: string): Promise<Feed[]> {
|
||||
return runSearch(page, keyword);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
export const XHH_SELECTORS = {
|
||||
login: {
|
||||
loggedInIndicators: [
|
||||
'.user-profile-user-head',
|
||||
'.user-info .user-name',
|
||||
'.view-header__user-box',
|
||||
],
|
||||
loginButton: '.user-box__login, .login-btn, button:has-text("登录")',
|
||||
qrCodeImage: [
|
||||
'#login-qrcode img',
|
||||
'#login-qrcode canvas',
|
||||
'.qr-code-wrapper img',
|
||||
'.qrcode-box img',
|
||||
'img[src*="qrcode"]',
|
||||
],
|
||||
username: '.user-profile-user-head .name, .user-info .user-name',
|
||||
avatar: '.user-profile-user-head img, .user-info img.user-image',
|
||||
userLink: 'a[href*="/app/user/profile/"]',
|
||||
},
|
||||
|
||||
feed: {
|
||||
card: [
|
||||
'.content-management-home__content',
|
||||
'.hb-cpt__moment-list-content',
|
||||
'.related-recommend__link-item--content',
|
||||
'.bbs-home__content-list > *',
|
||||
],
|
||||
link: 'a[href*="/app/bbs/link/"]',
|
||||
title: [
|
||||
'.link-item__title',
|
||||
'.content-list__title',
|
||||
'.article-title .title',
|
||||
'.title',
|
||||
],
|
||||
description: [
|
||||
'.link-item__desc',
|
||||
'.content-list__desc',
|
||||
'.article-desc',
|
||||
'.desc',
|
||||
],
|
||||
cover: 'img',
|
||||
userLink: 'a[href*="/app/user/profile/"]',
|
||||
userName: [
|
||||
'.list-content__username',
|
||||
'.user-name',
|
||||
'.name',
|
||||
],
|
||||
likeCount: [
|
||||
'.content-list__like-cnt',
|
||||
'.like-count',
|
||||
'.link-award-num',
|
||||
],
|
||||
commentCount: [
|
||||
'.content-list__comment-cnt',
|
||||
'.comment-count',
|
||||
'.comment-num',
|
||||
],
|
||||
},
|
||||
|
||||
detail: {
|
||||
title: [
|
||||
'.link-detail__title',
|
||||
'.bbs-link__title',
|
||||
'.article-title .title',
|
||||
'h1',
|
||||
],
|
||||
description: [
|
||||
'.link-detail__content',
|
||||
'.bbs-link__content',
|
||||
'.article-content',
|
||||
'.description',
|
||||
],
|
||||
image: '.article-content img, .bbs-link img, .link-detail img',
|
||||
userLink: 'a[href*="/app/user/profile/"]',
|
||||
userName: [
|
||||
'.user-profile-user-head .name',
|
||||
'.user-info .user-name',
|
||||
'.header .name',
|
||||
],
|
||||
userAvatar: '.user-profile-user-head img, .user-info img, .header img',
|
||||
commentItem: [
|
||||
'.comment-item',
|
||||
'.bbs-comment-item',
|
||||
'.link-comment-item',
|
||||
'.comment__item',
|
||||
'[id*="comment"]',
|
||||
],
|
||||
subCommentItem: [
|
||||
'.sub-comment-item',
|
||||
'.reply-item',
|
||||
'.sub-comment',
|
||||
'.child-comment',
|
||||
],
|
||||
commentAuthor: '.name, .nickname, a[href*="/app/user/profile/"]',
|
||||
commentAvatar: 'img',
|
||||
commentContent: '.content, .comment-content, p',
|
||||
commentTime: '.time, .date, .create-time',
|
||||
commentLikeCount: '.like-count, .like .count',
|
||||
commentReplyButton: 'button:has-text("回复"), .reply-btn, .comment-reply',
|
||||
commentExpandReplies: 'button:has-text("展开"), .show-more, .expand-replies',
|
||||
likeButton: [
|
||||
'.engage-bar-style .like-wrapper',
|
||||
'.like-wrapper',
|
||||
'button:has(.heybox-bbs_thumbs-up_line_24x24)',
|
||||
'button:has(.heybox-bbs_thumbs-up_filled_24x24)',
|
||||
],
|
||||
favoriteButton: [
|
||||
'.engage-bar-style .collect-wrapper',
|
||||
'.collect-wrapper',
|
||||
'button:has(.heybox-bbs_collect_line_24x24)',
|
||||
'button:has(.heybox-bbs_collect_filled_24x24)',
|
||||
],
|
||||
commentCount: '.content-list__comment-cnt, .comment-count, .comment-num',
|
||||
likeCount: '.content-list__like-cnt, .like-count, .link-award-num',
|
||||
favoriteCount: '.favorite-count, .collect-count, .favour-count',
|
||||
commentInput: [
|
||||
'textarea[placeholder*="评论"]',
|
||||
'textarea',
|
||||
'[contenteditable="true"][placeholder*="评论"]',
|
||||
'[contenteditable="true"]',
|
||||
],
|
||||
commentSubmit: [
|
||||
'button:has-text("发送")',
|
||||
'button:has-text("发布")',
|
||||
'button:has-text("评论")',
|
||||
'.comment-submit',
|
||||
'.submit',
|
||||
],
|
||||
},
|
||||
|
||||
profile: {
|
||||
nickname: '.user-profile-user-head .name, .user-info .user-name',
|
||||
avatar: '.user-profile-user-head img, .user-info img.user-image',
|
||||
description: '.user-profile-user-head .desc, .user-info .user-desc, .signature',
|
||||
followCount: '.bbs-info-item .value, .follow-num',
|
||||
postLink: 'a[href*="/app/bbs/link/"]',
|
||||
},
|
||||
|
||||
myPosts: {
|
||||
tabButton: '.creator-content-management__tabs button',
|
||||
postItem: '.content-management-home__content',
|
||||
postLink: 'a[href*="/app/bbs/link/"]',
|
||||
title: '.link-item__title, .title',
|
||||
description: '.link-item__desc, .desc',
|
||||
time: '.time, .date',
|
||||
},
|
||||
} as const;
|
||||
@@ -0,0 +1,52 @@
|
||||
import { extractLinkIdFromUrl, extractUserIdFromUrl } from './extractors.js';
|
||||
|
||||
interface FeedTargetInput {
|
||||
link_id?: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
interface UserTargetInput {
|
||||
user_id?: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
export interface FeedTargetResolved {
|
||||
linkId: string;
|
||||
}
|
||||
|
||||
export interface UserTargetResolved {
|
||||
userId: string;
|
||||
}
|
||||
|
||||
function normalizeUrl(url: string): string {
|
||||
const trimmed = url.trim();
|
||||
if (!trimmed) {
|
||||
throw new Error('url cannot be empty');
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function resolveFeedTarget(input: FeedTargetInput): FeedTargetResolved {
|
||||
const direct = input.link_id?.trim();
|
||||
if (direct) return { linkId: direct };
|
||||
|
||||
if (input.url) {
|
||||
const parsed = extractLinkIdFromUrl(normalizeUrl(input.url));
|
||||
if (parsed) return { linkId: parsed };
|
||||
}
|
||||
|
||||
throw new Error('xhh_get_feed_detail requires link_id or url containing link_id');
|
||||
}
|
||||
|
||||
export function resolveUserTarget(input: UserTargetInput): UserTargetResolved {
|
||||
const direct = input.user_id?.trim();
|
||||
if (direct) return { userId: direct };
|
||||
|
||||
if (input.url) {
|
||||
const parsed = extractUserIdFromUrl(normalizeUrl(input.url));
|
||||
if (parsed) return { userId: parsed };
|
||||
}
|
||||
|
||||
throw new Error('xhh_get_user_profile requires user_id or url containing user_id');
|
||||
}
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
export interface LoginStatus {
|
||||
loggedIn: boolean;
|
||||
username?: string;
|
||||
avatar?: string;
|
||||
userId?: string;
|
||||
}
|
||||
|
||||
export interface QRCodeResult {
|
||||
qrcodeData: string;
|
||||
alreadyLoggedIn: boolean;
|
||||
timeout: string;
|
||||
}
|
||||
|
||||
export interface FeedUser {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
}
|
||||
|
||||
export interface Feed {
|
||||
id: string;
|
||||
title: string;
|
||||
description: string;
|
||||
coverUrl: string;
|
||||
likeCount: number;
|
||||
commentCount: number;
|
||||
user: FeedUser;
|
||||
linkUrl: string;
|
||||
}
|
||||
|
||||
export interface Comment {
|
||||
id: string;
|
||||
parentId?: string;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
likeCount: number;
|
||||
createTime: string;
|
||||
subComments: Comment[];
|
||||
}
|
||||
|
||||
export interface FeedDetail {
|
||||
id: string;
|
||||
title: string;
|
||||
description: string;
|
||||
images: string[];
|
||||
likeCount: number;
|
||||
favoriteCount: number;
|
||||
commentCount: number;
|
||||
isLiked: boolean;
|
||||
isFavorited: boolean;
|
||||
user: FeedUser;
|
||||
comments: Comment[];
|
||||
}
|
||||
|
||||
export interface UserProfile {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
description: string;
|
||||
follows: number;
|
||||
fans: number;
|
||||
likes: number;
|
||||
posts: Feed[];
|
||||
}
|
||||
|
||||
export type MyPostType = 'all' | 'article' | 'image_text' | 'video';
|
||||
|
||||
export interface MyPost extends Feed {
|
||||
type: MyPostType;
|
||||
createTime?: string;
|
||||
modifyTime?: string;
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { UserProfile } from './types.js';
|
||||
import { detectCaptchaText, parseCountString } from './extractors.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
|
||||
const log = logger.child({ module: 'xhh-user-profile' });
|
||||
|
||||
function buildProfileUrl(userId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/user/profile/${encodeURIComponent(userId)}`;
|
||||
}
|
||||
|
||||
export async function getUserProfile(page: Page, userId: string): Promise<UserProfile> {
|
||||
await page.goto(buildProfileUrl(userId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on user profile page');
|
||||
}
|
||||
|
||||
const raw = await page.evaluate((selectors) => {
|
||||
const pickText = (selector: string) =>
|
||||
(document.querySelector(selector)?.textContent ?? '').trim();
|
||||
const pickAttr = (selector: string, attr: string) =>
|
||||
(document.querySelector(selector)?.getAttribute(attr) ?? '').trim();
|
||||
|
||||
const counters = [...document.querySelectorAll(selectors.profile.followCount)]
|
||||
.map((node) => (node.textContent ?? '').trim())
|
||||
.filter(Boolean);
|
||||
|
||||
const postLinks = [...document.querySelectorAll<HTMLAnchorElement>(selectors.profile.postLink)]
|
||||
.map((node) => node.getAttribute('href') ?? '')
|
||||
.filter(Boolean);
|
||||
|
||||
return {
|
||||
nickname: pickText(selectors.profile.nickname),
|
||||
avatar: pickAttr(selectors.profile.avatar, 'src'),
|
||||
description: pickText(selectors.profile.description),
|
||||
counters,
|
||||
postLinks,
|
||||
};
|
||||
}, XHH_SELECTORS);
|
||||
|
||||
const [followRaw, fansRaw, likesRaw] = raw.counters;
|
||||
|
||||
const recentPosts = await listFeeds(page).catch(() => []);
|
||||
const filteredPosts = recentPosts
|
||||
.filter((item) => item.user.id === userId || raw.postLinks.some((href: string) => href.includes(item.id)))
|
||||
.slice(0, 20);
|
||||
|
||||
const profile: UserProfile = {
|
||||
id: userId,
|
||||
nickname: raw.nickname,
|
||||
avatar: raw.avatar,
|
||||
description: raw.description,
|
||||
follows: parseCountString(followRaw),
|
||||
fans: parseCountString(fansRaw),
|
||||
likes: parseCountString(likesRaw),
|
||||
posts: filteredPosts,
|
||||
};
|
||||
|
||||
if (!profile.nickname && !profile.avatar) {
|
||||
throw new Error('waiting for selector: xhh profile not found');
|
||||
}
|
||||
|
||||
log.info({ userId, posts: profile.posts.length }, 'xhh user profile extracted');
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
decodeKeysetCursor,
|
||||
encodeKeysetCursor,
|
||||
paginateByKeyset,
|
||||
} from '../src/platforms/xiaoheihe/cursor.js';
|
||||
|
||||
describe('xhh keyset cursor', () => {
|
||||
it('encodes and decodes cursor payload', () => {
|
||||
const encoded = encodeKeysetCursor({ key: 'abc-123' });
|
||||
const decoded = decodeKeysetCursor(encoded);
|
||||
expect(decoded).toEqual({ key: 'abc-123' });
|
||||
});
|
||||
|
||||
it('throws on invalid cursor payload', () => {
|
||||
expect(() => decodeKeysetCursor('not-base64')).toThrow();
|
||||
});
|
||||
|
||||
it('paginates deterministically without duplicates', () => {
|
||||
const items = [
|
||||
{ id: 'a' },
|
||||
{ id: 'b' },
|
||||
{ id: 'c' },
|
||||
{ id: 'd' },
|
||||
{ id: 'e' },
|
||||
];
|
||||
|
||||
const page1 = paginateByKeyset(items, 2, undefined, (item) => item.id);
|
||||
expect(page1.items.map((i) => i.id)).toEqual(['a', 'b']);
|
||||
expect(page1.nextCursor).toBeTruthy();
|
||||
|
||||
const page2 = paginateByKeyset(items, 2, decodeKeysetCursor(page1.nextCursor), (item) => item.id);
|
||||
expect(page2.items.map((i) => i.id)).toEqual(['c', 'd']);
|
||||
expect(page2.nextCursor).toBeTruthy();
|
||||
|
||||
const page3 = paginateByKeyset(items, 2, decodeKeysetCursor(page2.nextCursor), (item) => item.id);
|
||||
expect(page3.items.map((i) => i.id)).toEqual(['e']);
|
||||
expect(page3.hasMore).toBe(false);
|
||||
|
||||
const combined = [...page1.items, ...page2.items, ...page3.items].map((i) => i.id);
|
||||
expect(combined).toEqual(['a', 'b', 'c', 'd', 'e']);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
detectCaptchaText,
|
||||
extractLinkIdFromUrl,
|
||||
extractUserIdFromUrl,
|
||||
firstNonEmpty,
|
||||
parseCountString,
|
||||
} from '../src/platforms/xiaoheihe/extractors.js';
|
||||
|
||||
describe('xhh extractors', () => {
|
||||
it('parses count strings', () => {
|
||||
expect(parseCountString('123')).toBe(123);
|
||||
expect(parseCountString('1.2万')).toBe(12000);
|
||||
expect(parseCountString('')).toBe(0);
|
||||
});
|
||||
|
||||
it('detects captcha text', () => {
|
||||
expect(detectCaptchaText('show_captcha')).toBe(true);
|
||||
expect(detectCaptchaText('请完成验证码')).toBe(true);
|
||||
expect(detectCaptchaText('normal page')).toBe(false);
|
||||
});
|
||||
|
||||
it('extracts link_id and user_id from url', () => {
|
||||
expect(extractLinkIdFromUrl('https://www.xiaoheihe.cn/app/bbs/link/123456')).toBe('123456');
|
||||
expect(extractLinkIdFromUrl('/app/bbs/link/998877')).toBe('998877');
|
||||
expect(extractUserIdFromUrl('https://www.xiaoheihe.cn/app/user/profile/112233')).toBe('112233');
|
||||
expect(extractUserIdFromUrl('/app/user/profile/778899')).toBe('778899');
|
||||
});
|
||||
|
||||
it('returns first non-empty value', () => {
|
||||
expect(firstNonEmpty('', ' ', 'x', 'y')).toBe('x');
|
||||
expect(firstNonEmpty('', ' ')).toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
GetFeedDetailSchema,
|
||||
ListFeedsSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
SearchSchema,
|
||||
SetFavoriteStateSchema,
|
||||
SetLikeStateSchema,
|
||||
} from '../src/platforms/xiaoheihe/schemas.js';
|
||||
|
||||
describe('xhh schemas', () => {
|
||||
it('validates list/query boundaries', () => {
|
||||
const schema = z.object(ListFeedsSchema);
|
||||
expect(schema.parse({ max_count: 20 }).max_count).toBe(20);
|
||||
expect(() => schema.parse({ max_count: 0 })).toThrow();
|
||||
expect(() => schema.parse({ max_count: 201 })).toThrow();
|
||||
});
|
||||
|
||||
it('validates search required keyword', () => {
|
||||
const schema = z.object(SearchSchema);
|
||||
expect(() => schema.parse({})).toThrow();
|
||||
expect(schema.parse({ keyword: 'aaa' }).keyword).toBe('aaa');
|
||||
});
|
||||
|
||||
it('allows feed detail by link_id or url', () => {
|
||||
const schema = z.object(GetFeedDetailSchema);
|
||||
expect(schema.parse({ link_id: '123' }).link_id).toBe('123');
|
||||
expect(schema.parse({ url: 'https://www.xiaoheihe.cn/app/bbs/link/123' }).url).toContain('/app/bbs/link/');
|
||||
});
|
||||
|
||||
it('validates comment payloads', () => {
|
||||
const postSchema = z.object(PostCommentSchema);
|
||||
const replySchema = z.object(ReplyCommentSchema);
|
||||
expect(postSchema.parse({ link_id: '1', content: 'hi' }).content).toBe('hi');
|
||||
expect(replySchema.parse({ link_id: '1', comment_id: '2', content: 'ok' }).comment_id).toBe('2');
|
||||
expect(() => postSchema.parse({ link_id: '1', content: '' })).toThrow();
|
||||
});
|
||||
|
||||
it('validates set-state tools', () => {
|
||||
const likeSchema = z.object(SetLikeStateSchema);
|
||||
const favSchema = z.object(SetFavoriteStateSchema);
|
||||
expect(likeSchema.parse({ link_id: '1', liked: true }).liked).toBe(true);
|
||||
expect(favSchema.parse({ link_id: '1', favorited: false }).favorited).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { resolveFeedTarget, resolveUserTarget } from '../src/platforms/xiaoheihe/target-resolver.js';
|
||||
|
||||
describe('xhh target resolver', () => {
|
||||
it('resolves feed target from link_id', () => {
|
||||
expect(resolveFeedTarget({ link_id: '123' })).toEqual({ linkId: '123' });
|
||||
});
|
||||
|
||||
it('resolves feed target from url', () => {
|
||||
expect(resolveFeedTarget({ url: 'https://www.xiaoheihe.cn/app/bbs/link/123' })).toEqual({ linkId: '123' });
|
||||
});
|
||||
|
||||
it('throws on invalid feed target', () => {
|
||||
expect(() => resolveFeedTarget({})).toThrow();
|
||||
});
|
||||
|
||||
it('resolves user target from user_id', () => {
|
||||
expect(resolveUserTarget({ user_id: '999' })).toEqual({ userId: '999' });
|
||||
});
|
||||
|
||||
it('resolves user target from url', () => {
|
||||
expect(resolveUserTarget({ url: 'https://www.xiaoheihe.cn/app/user/profile/888' })).toEqual({ userId: '888' });
|
||||
});
|
||||
|
||||
it('throws on invalid user target', () => {
|
||||
expect(() => resolveUserTarget({})).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "dist"
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
import { defineConfig } from 'tsup';
|
||||
|
||||
export default defineConfig({
|
||||
entry: ['src/main.ts'],
|
||||
noExternal: [/^@social\/core/],
|
||||
external: [
|
||||
'@modelcontextprotocol/sdk',
|
||||
/^@modelcontextprotocol\/sdk\//,
|
||||
'express',
|
||||
'pino',
|
||||
'pino-pretty',
|
||||
'rebrowser-playwright',
|
||||
'chromium-bidi/lib/cjs/bidiMapper/BidiMapper',
|
||||
'chromium-bidi/lib/cjs/cdp/CdpConnection',
|
||||
],
|
||||
format: ['esm'],
|
||||
target: 'node22',
|
||||
outDir: 'dist',
|
||||
clean: true,
|
||||
sourcemap: true,
|
||||
dts: false,
|
||||
splitting: false,
|
||||
shims: false,
|
||||
});
|
||||
@@ -0,0 +1,14 @@
|
||||
import { defineConfig } from 'vitest/config';
|
||||
import path from 'node:path';
|
||||
|
||||
export default defineConfig({
|
||||
resolve: {
|
||||
alias: {
|
||||
'@social/core': path.resolve(__dirname, '../../packages/core/src'),
|
||||
},
|
||||
},
|
||||
test: {
|
||||
include: ['test/**/*.test.ts'],
|
||||
environment: 'node',
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "@social/xhs-mcp",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"main": "dist/main.js",
|
||||
"bin": {
|
||||
"mcp-xhs": "dist/main.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsup",
|
||||
"lint": "tsc --noEmit",
|
||||
"test": "vitest run",
|
||||
"start": "PORT=${PORT:-9527} COOKIE_DIR=${COOKIE_DIR:-$HOME/.social-mcp-xhs} node dist/main.js",
|
||||
"dev": "pnpm build && pnpm start"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.27.0",
|
||||
"@social/core": "workspace:*",
|
||||
"zod": "^3.25.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.0.0",
|
||||
"tsup": "^8.0.0",
|
||||
"typescript": "^5.7.0",
|
||||
"vitest": "^3.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
import { startServerWithPlugins } from '@social/core/server/bootstrap.js';
|
||||
import { xiaohongshuPlugin } from './platforms/xiaohongshu/index.js';
|
||||
|
||||
startServerWithPlugins([xiaohongshuPlugin]);
|
||||
|
||||
@@ -0,0 +1,478 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Wait after typing comment text before submitting. */
|
||||
const TYPE_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after submit click to verify success. */
|
||||
const SUBMIT_SETTLE_MS = 2_000;
|
||||
|
||||
const selComment = XHS_SELECTORS.comment;
|
||||
const selDetail = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-comment' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// postComment
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Post a top-level comment on a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID to comment on.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param content - Comment text to post.
|
||||
* @returns Object indicating whether the comment was posted successfully.
|
||||
*/
|
||||
export async function postComment(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
content: string,
|
||||
): Promise<{ success: boolean; comment_id?: string }> {
|
||||
log.info({ feedId }, 'Posting comment on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note container to be visible.
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Find and focus the comment input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const commentInput = await findCommentInput(page);
|
||||
|
||||
if (!commentInput) {
|
||||
log.warn('Comment input not found on feed detail page');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Type the comment content
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await commentInput.click();
|
||||
await page.waitForTimeout(300);
|
||||
await page.keyboard.type(content, { delay: 30 });
|
||||
await page.waitForTimeout(TYPE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Submit the comment
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const submitted = await submitComment(page);
|
||||
|
||||
if (!submitted) {
|
||||
log.warn('Failed to submit comment — submit button not found or click failed');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Verify the comment was posted
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.waitForTimeout(SUBMIT_SETTLE_MS);
|
||||
|
||||
// Check for the comment text in the page to verify success.
|
||||
const pageContent = await page.content();
|
||||
const textHit = pageContent.includes(content.slice(0, 20));
|
||||
const commentId = await extractTopLevelCommentId(page, content);
|
||||
const success = textHit || !!commentId;
|
||||
|
||||
log.info({ feedId, success, commentId }, 'Comment post complete');
|
||||
|
||||
return {
|
||||
success,
|
||||
...(commentId ? { comment_id: commentId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// replyComment
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reply to an existing comment on a Xiaohongshu note.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note / feed ID.
|
||||
* @param xsecToken - Security token for accessing the feed page.
|
||||
* @param content - Reply text to post.
|
||||
* @param commentId - Optional ID of the comment to reply to (for targeting).
|
||||
* @param userId - Optional user ID of the comment author (for @ mention).
|
||||
* @returns Object indicating whether the reply was posted successfully.
|
||||
*/
|
||||
export async function replyComment(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
content: string,
|
||||
commentId?: string,
|
||||
userId?: string,
|
||||
): Promise<{ success: boolean; reply_id?: string }> {
|
||||
log.info({ feedId, commentId, userId }, 'Replying to comment on note');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the feed detail page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const feedUrl = buildFeedUrl(feedId, xsecToken);
|
||||
await page.goto(feedUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Find the target comment and click its reply button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
let replyInputFound = false;
|
||||
|
||||
if (commentId) {
|
||||
// Try to find the comment by its ID attribute and click its reply button.
|
||||
replyInputFound = await clickReplyOnComment(page, commentId);
|
||||
}
|
||||
|
||||
if (!replyInputFound) {
|
||||
// Fallback: find the first comment's reply button.
|
||||
const commentItems = await page.$$(selComment.commentItem);
|
||||
|
||||
if (commentItems.length > 0) {
|
||||
// Hover to reveal the reply button (some UIs show it on hover).
|
||||
const firstComment = commentItems[0]!;
|
||||
await firstComment.hover();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
const replyBtn = await firstComment.$(selComment.commentReplyButton);
|
||||
if (replyBtn) {
|
||||
await replyBtn.click();
|
||||
await page.waitForTimeout(500);
|
||||
replyInputFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we still haven't activated a reply input, fall back to the main
|
||||
// comment input and prefix with @userId if available.
|
||||
if (!replyInputFound) {
|
||||
log.debug('Reply button not found, falling back to main comment input');
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Type the reply content
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Try to find the reply-specific input first, then fall back to the
|
||||
// main comment input.
|
||||
const replyInput =
|
||||
(await page.$(selComment.replyInput)) ??
|
||||
(await findCommentInput(page));
|
||||
|
||||
if (!replyInput) {
|
||||
log.warn('Reply input not found');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
await replyInput.click();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
// If we're using the main comment input as fallback, prefix with @user.
|
||||
const textToType =
|
||||
!replyInputFound && userId ? `@${userId} ${content}` : content;
|
||||
|
||||
await page.keyboard.type(textToType, { delay: 30 });
|
||||
await page.waitForTimeout(TYPE_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Submit the reply
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const submitted = await submitComment(page);
|
||||
|
||||
if (!submitted) {
|
||||
log.warn('Failed to submit reply');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Verify
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.waitForTimeout(SUBMIT_SETTLE_MS);
|
||||
|
||||
const pageContent = await page.content();
|
||||
const textHit = pageContent.includes(content.slice(0, 20));
|
||||
const replyId = await extractReplyCommentId(page, content, commentId);
|
||||
const success = textHit || !!replyId;
|
||||
|
||||
log.info({ feedId, commentId, success, replyId }, 'Reply post complete');
|
||||
|
||||
return {
|
||||
success,
|
||||
...(replyId ? { reply_id: replyId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the URL for a feed detail page.
|
||||
*/
|
||||
function buildFeedUrl(feedId: string, xsecToken: string): string {
|
||||
return `${FEED_DETAIL_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_search`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the main comment input element. Tries the primary selector first,
|
||||
* then the alternative contenteditable selector.
|
||||
*/
|
||||
async function findCommentInput(page: Page) {
|
||||
// Try the primary comment textarea.
|
||||
let input = await page.$(selComment.commentInput);
|
||||
if (input) return input;
|
||||
|
||||
// Try the alternative contenteditable div.
|
||||
input = await page.$(selComment.commentInputAlt);
|
||||
if (input) return input;
|
||||
|
||||
// Last resort: try clicking in the comment area to activate the input.
|
||||
// Some UIs only show the input after clicking in the comment zone.
|
||||
const commentArea = await page.$('.comment-area, .comments-container');
|
||||
if (commentArea) {
|
||||
await commentArea.click();
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
input = await page.$(selComment.commentInput);
|
||||
if (input) return input;
|
||||
|
||||
input = await page.$(selComment.commentInputAlt);
|
||||
if (input) return input;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a specific comment by its ID and click its reply button.
|
||||
*/
|
||||
async function clickReplyOnComment(
|
||||
page: Page,
|
||||
commentId: string,
|
||||
): Promise<boolean> {
|
||||
// Try to find comment by data attribute or ID.
|
||||
const commentEl =
|
||||
(await page.$(`[id="comment-${commentId}"]`)) ??
|
||||
(await page.$(`[data-comment-id="${commentId}"]`));
|
||||
|
||||
if (!commentEl) {
|
||||
log.debug({ commentId }, 'Target comment element not found by ID');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Hover to reveal the reply button.
|
||||
await commentEl.hover();
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
const replyBtn = await commentEl.$(selComment.commentReplyButton);
|
||||
if (!replyBtn) {
|
||||
log.debug({ commentId }, 'Reply button not found on target comment');
|
||||
return false;
|
||||
}
|
||||
|
||||
await replyBtn.click();
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find and click the comment submit button. Tries multiple selectors.
|
||||
*/
|
||||
async function submitComment(page: Page): Promise<boolean> {
|
||||
// Try the primary submit button.
|
||||
let submitBtn = await page.$(selComment.commentSubmitButton);
|
||||
|
||||
if (!submitBtn) {
|
||||
// Some UIs submit on Ctrl+Enter / Cmd+Enter.
|
||||
log.debug('Submit button not found, trying keyboard shortcut');
|
||||
await page.keyboard.press('Control+Enter');
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the button is enabled before clicking.
|
||||
const isDisabled = await submitBtn.isDisabled().catch(() => false);
|
||||
if (isDisabled) {
|
||||
log.debug('Submit button is disabled, waiting briefly');
|
||||
await page.waitForTimeout(1_000);
|
||||
// Re-query in case the button became enabled.
|
||||
submitBtn = await page.$(selComment.commentSubmitButton);
|
||||
if (!submitBtn) return false;
|
||||
}
|
||||
|
||||
await submitBtn.click();
|
||||
return true;
|
||||
}
|
||||
|
||||
function normalizeText(input: string): string {
|
||||
return input.replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
|
||||
function parseCommentElementId(value: string | null): string {
|
||||
if (!value) return '';
|
||||
return value.replace(/^comment-/, '').trim();
|
||||
}
|
||||
|
||||
function matchContent(candidate: string, target: string): boolean {
|
||||
const c = normalizeText(candidate);
|
||||
const t = normalizeText(target);
|
||||
if (!c || !t) return false;
|
||||
return c === t || c.includes(t) || t.includes(c);
|
||||
}
|
||||
|
||||
async function extractTopLevelCommentId(
|
||||
page: Page,
|
||||
content: string,
|
||||
): Promise<string | undefined> {
|
||||
const fromStore = await page.evaluate((targetContent: string) => {
|
||||
const match = (candidate: unknown): boolean => {
|
||||
const c = String(candidate ?? '').replace(/\s+/g, ' ').trim();
|
||||
const t = String(targetContent ?? '').replace(/\s+/g, ' ').trim();
|
||||
if (!c || !t) return false;
|
||||
return c === t || c.includes(t) || t.includes(c);
|
||||
};
|
||||
const normalizeId = (id: unknown): string =>
|
||||
String(id ?? '').replace(/^comment-/, '').trim();
|
||||
|
||||
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
||||
Record<string, unknown> | undefined;
|
||||
const note = state?.note as Record<string, unknown> | undefined;
|
||||
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
||||
if (!map) return null;
|
||||
|
||||
for (const entry of Object.values(map)) {
|
||||
const comments = entry?.comments as { list?: Array<Record<string, unknown>> } | undefined;
|
||||
if (!comments?.list) continue;
|
||||
for (const one of comments.list) {
|
||||
if (match(one.content)) {
|
||||
const id = normalizeId(one.id);
|
||||
if (id) return id;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}, content).catch(() => null);
|
||||
|
||||
if (typeof fromStore === 'string' && fromStore) {
|
||||
return fromStore;
|
||||
}
|
||||
|
||||
const candidates = await page.$$('.parent-comment .comment-item, [id^="comment-"]');
|
||||
for (const candidate of candidates) {
|
||||
const text = (await candidate.$eval('.content', (el) => el.textContent ?? '').catch(() => ''));
|
||||
if (!matchContent(text, content)) continue;
|
||||
|
||||
const id = parseCommentElementId(
|
||||
await candidate.getAttribute('id').catch(() => null),
|
||||
) || parseCommentElementId(
|
||||
await candidate.getAttribute('data-comment-id').catch(() => null),
|
||||
) || parseCommentElementId(
|
||||
await candidate.getAttribute('data-id').catch(() => null),
|
||||
);
|
||||
if (id) return id;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function extractReplyCommentId(
|
||||
page: Page,
|
||||
content: string,
|
||||
parentCommentId?: string,
|
||||
): Promise<string | undefined> {
|
||||
const fromStore = await page.evaluate(
|
||||
(args: { targetContent: string; parentCommentId?: string }) => {
|
||||
const match = (candidate: unknown): boolean => {
|
||||
const c = String(candidate ?? '').replace(/\s+/g, ' ').trim();
|
||||
const t = String(args.targetContent ?? '').replace(/\s+/g, ' ').trim();
|
||||
if (!c || !t) return false;
|
||||
return c === t || c.includes(t) || t.includes(c);
|
||||
};
|
||||
const normalizeId = (id: unknown): string =>
|
||||
String(id ?? '').replace(/^comment-/, '').trim();
|
||||
|
||||
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
||||
Record<string, unknown> | undefined;
|
||||
const note = state?.note as Record<string, unknown> | undefined;
|
||||
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
||||
if (!map) return null;
|
||||
|
||||
for (const entry of Object.values(map)) {
|
||||
const comments = entry?.comments as { list?: Array<Record<string, unknown>> } | undefined;
|
||||
if (!comments?.list) continue;
|
||||
for (const parent of comments.list) {
|
||||
const parentId = normalizeId(parent.id);
|
||||
if (args.parentCommentId && parentId !== args.parentCommentId) continue;
|
||||
|
||||
const subs = (parent.subComments ?? parent.sub_comments ?? []) as Array<Record<string, unknown>>;
|
||||
for (const sub of subs) {
|
||||
if (match(sub.content)) {
|
||||
const id = normalizeId(sub.id);
|
||||
if (id) return id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
},
|
||||
{ targetContent: content, parentCommentId },
|
||||
).catch(() => null);
|
||||
|
||||
if (typeof fromStore === 'string' && fromStore) {
|
||||
return fromStore;
|
||||
}
|
||||
|
||||
const parentCandidates = parentCommentId
|
||||
? await page.$$(
|
||||
`[id="comment-${parentCommentId}"], [data-comment-id="${parentCommentId}"], [data-id="${parentCommentId}"]`,
|
||||
)
|
||||
: await page.$$('.parent-comment');
|
||||
|
||||
for (const parent of parentCandidates) {
|
||||
const replyItems = await parent.$$('.sub-comment-item, [id^="comment-"]');
|
||||
for (const item of replyItems) {
|
||||
const text = await item
|
||||
.$eval('.content', (el) => el.textContent ?? '')
|
||||
.catch(() => item.textContent().catch(() => ''));
|
||||
if (!matchContent(text ?? '', content)) continue;
|
||||
|
||||
const id = parseCommentElementId(
|
||||
await item.getAttribute('id').catch(() => null),
|
||||
) || parseCommentElementId(
|
||||
await item.getAttribute('data-comment-id').catch(() => null),
|
||||
) || parseCommentElementId(
|
||||
await item.getAttribute('data-id').catch(() => null),
|
||||
);
|
||||
if (id) return id;
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
@@ -0,0 +1,833 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { FeedDetail, Comment } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_BASE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Maximum number of "show more" clicks to load comments (safety limit). */
|
||||
const MAX_LOAD_MORE_CLICKS = 50;
|
||||
|
||||
/** Delay between "show more" clicks to let the page render. */
|
||||
const LOAD_MORE_DELAY_MS = 1500;
|
||||
|
||||
const SEL = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-feed-detail' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for feed detail
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawDetailState {
|
||||
noteData?: {
|
||||
data?: {
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
note?: {
|
||||
noteDetailMap?: Record<string, { note?: RawNoteData; comments?: RawCommentData[] }>;
|
||||
note?: RawNoteData;
|
||||
noteData?: RawNoteData;
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface RawNoteData {
|
||||
noteId?: string;
|
||||
id?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
type?: string;
|
||||
imageList?: RawNoteImage[];
|
||||
image_list?: RawNoteImage[];
|
||||
video?: RawNoteVideo;
|
||||
tagList?: RawNoteTag[];
|
||||
tag_list?: RawNoteTag[];
|
||||
interactInfo?: RawNoteInteract;
|
||||
interact_info?: RawNoteInteract;
|
||||
time?: number;
|
||||
createTime?: number;
|
||||
create_time?: number;
|
||||
lastUpdateTime?: number;
|
||||
last_update_time?: number;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
user?: RawNoteUser;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
comments?: RawCommentData[];
|
||||
}
|
||||
|
||||
interface RawNoteImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawNoteVideo {
|
||||
url?: string;
|
||||
consumer?: {
|
||||
originVideoKey?: string;
|
||||
origin_video_key?: string;
|
||||
};
|
||||
media?: {
|
||||
stream?: {
|
||||
h264?: Array<{
|
||||
masterUrl?: string;
|
||||
master_url?: string;
|
||||
}>;
|
||||
};
|
||||
video?: {
|
||||
url?: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface RawNoteTag {
|
||||
id?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
interface RawNoteInteract {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
collectedCount?: string;
|
||||
collected_count?: string;
|
||||
commentCount?: string;
|
||||
comment_count?: string;
|
||||
}
|
||||
|
||||
interface RawNoteUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawCommentData {
|
||||
id?: string;
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
userInfo?: {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
image?: string;
|
||||
avatar?: string;
|
||||
};
|
||||
content?: string;
|
||||
likeCount?: string | number;
|
||||
like_count?: string | number;
|
||||
createTime?: number;
|
||||
create_time?: number;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
subComments?: RawCommentData[];
|
||||
sub_comments?: RawCommentData[];
|
||||
subCommentCount?: number | string;
|
||||
sub_comment_count?: number | string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getFeedDetail
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to a Xiaohongshu note detail page and extract comprehensive
|
||||
* information including title, content, images/video, and stats.
|
||||
*
|
||||
* First-screen comments (10-20 top-level, each with 1-2 sub-comment
|
||||
* previews) are included. Use {@link getSubComments} to load complete
|
||||
* sub-comments for a specific parent comment.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note (feed) ID.
|
||||
* @param xsecToken - Security token required to access the note.
|
||||
* @returns A FeedDetail object with full note data including first-screen comments.
|
||||
*/
|
||||
export async function getFeedDetail(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): Promise<FeedDetail> {
|
||||
const url = `${FEED_DETAIL_BASE_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_feed`;
|
||||
log.debug({ feedId, url }, 'Navigating to feed detail page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note content container to appear.
|
||||
await page.waitForSelector(
|
||||
`${SEL.noteContainer}, ${SEL.title}, ${SEL.description}`,
|
||||
{ timeout: 15_000 },
|
||||
).catch(() => {
|
||||
log.warn({ feedId }, 'Note container not found within timeout, proceeding with extraction');
|
||||
});
|
||||
|
||||
// Allow render to settle.
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as RawDetailState | null;
|
||||
let detail: FeedDetail | null = null;
|
||||
|
||||
if (initialState) {
|
||||
detail = parseDetailFromState(initialState, feedId, xsecToken);
|
||||
if (detail) {
|
||||
log.debug({ feedId }, 'Extracted feed detail from __INITIAL_STATE__');
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping
|
||||
// -----------------------------------------------------------------------
|
||||
if (!detail) {
|
||||
log.debug({ feedId }, 'Falling back to DOM scraping for feed detail');
|
||||
detail = await scrapeDetailFromDom(page, feedId, xsecToken);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Extract comments from the Vue store (loaded async after page render).
|
||||
// Poll until firstRequestFinish === true, then read comments.list.
|
||||
// -----------------------------------------------------------------------
|
||||
if (detail.comments.length === 0) {
|
||||
const storeComments = await extractCommentsFromStore(page, feedId);
|
||||
if (storeComments.length > 0) {
|
||||
detail.comments = storeComments;
|
||||
log.debug({ feedId, count: storeComments.length }, 'Extracted comments from Vue store');
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Detect current user's like / favorite state from the overlay DOM.
|
||||
// .interact-container is unique to the overlay (feed list cards don't have it).
|
||||
// XHS loads user state asynchronously, so wait up to 3s for the buttons.
|
||||
// -----------------------------------------------------------------------
|
||||
await page.waitForSelector('.interact-container .like-wrapper', { timeout: 3_000 }).catch(() => {});
|
||||
await page.waitForTimeout(1_000); // extra time for async state update
|
||||
|
||||
// XHS uses SVG xlink:href to indicate state: #like vs #liked, #collect vs #collected
|
||||
const interactionState = await page.evaluate(() => {
|
||||
const likeIcon = document.querySelector('.interact-container .like-wrapper use');
|
||||
const favIcon = document.querySelector('.interact-container .collect-wrapper use');
|
||||
return {
|
||||
isLiked: likeIcon?.getAttribute('xlink:href') === '#liked',
|
||||
isFavorited: favIcon?.getAttribute('xlink:href') === '#collected',
|
||||
};
|
||||
});
|
||||
detail.isLiked = interactionState.isLiked ?? false;
|
||||
detail.isFavorited = interactionState.isFavorited ?? false;
|
||||
|
||||
log.info(
|
||||
{ feedId, imageCount: detail.images.length, isLiked: detail.isLiked, isFavorited: detail.isFavorited },
|
||||
'Feed detail extraction complete',
|
||||
);
|
||||
|
||||
return detail;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getSubComments
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to a Xiaohongshu note detail page, find a specific top-level
|
||||
* comment, and load its sub-comments (replies) by clicking "展开更多回复"
|
||||
* until we have at least `maxCount` or no more to load.
|
||||
*
|
||||
* The first-screen comments (with 1-2 sub-comment previews) are already
|
||||
* returned by {@link getFeedDetail}. This function is for loading more
|
||||
* sub-comments for a specific parent comment.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param feedId - The note (feed) ID.
|
||||
* @param xsecToken - Security token required to access the note.
|
||||
* @param commentId - The parent comment ID whose sub-comments to load.
|
||||
* @param maxCount - Stop loading once we have at least this many (default 20).
|
||||
* @returns An array of Comment objects (the sub-comments).
|
||||
*/
|
||||
export async function getSubComments(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
commentId: string,
|
||||
maxCount = 20,
|
||||
): Promise<Comment[]> {
|
||||
const url = `${FEED_DETAIL_BASE_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_feed`;
|
||||
log.debug({ feedId, commentId, url, maxCount }, 'Navigating to feed page for sub-comments');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the note container, then immediately poll the store —
|
||||
// no extra fixed delay needed, the store poll covers timing.
|
||||
await page.waitForSelector(SEL.noteContainer, { timeout: 15_000 }).catch(() => {
|
||||
log.warn({ feedId }, 'Note container not found within timeout, proceeding');
|
||||
});
|
||||
|
||||
// Wait for comments store to finish initial load.
|
||||
await waitForCommentsStoreReady(page, feedId);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Locate the parent-comment DOM wrapper for the target comment and
|
||||
// click "展开更多回复" repeatedly to load all sub-comments.
|
||||
// -----------------------------------------------------------------------
|
||||
// Find the index of the .parent-comment that owns our commentId so we
|
||||
// can get a real ElementHandle (evaluateHandle returns JSHandle which
|
||||
// lacks $ / isVisible).
|
||||
const parentIndex = await page.evaluate((cid: string) => {
|
||||
const parents = document.querySelectorAll('.parent-comment');
|
||||
for (let i = 0; i < parents.length; i++) {
|
||||
const parent = parents.item(i);
|
||||
if (!parent) continue;
|
||||
const item = parent.querySelector('.comment-item');
|
||||
if (!item) continue;
|
||||
const id =
|
||||
item.getAttribute('id')?.replace(/^comment-/, '') ??
|
||||
item.getAttribute('data-id') ??
|
||||
item.getAttribute('data-comment-id') ??
|
||||
'';
|
||||
if (id === cid) return i;
|
||||
}
|
||||
return -1;
|
||||
}, commentId);
|
||||
|
||||
let clicks = 0;
|
||||
|
||||
if (parentIndex >= 0) {
|
||||
const parentEls = await page.$$('.parent-comment');
|
||||
const parentEl = parentEls[parentIndex];
|
||||
|
||||
if (parentEl) {
|
||||
// Scroll the comment into view first.
|
||||
await parentEl.scrollIntoViewIfNeeded().catch(() => {});
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
while (clicks < MAX_LOAD_MORE_CLICKS) {
|
||||
// Check if we already have enough sub-comments in the store.
|
||||
const currentCount = await getStoreSubCommentCount(page, feedId, commentId);
|
||||
if (currentCount >= maxCount) break;
|
||||
|
||||
// Look for "load more replies" button inside this comment thread.
|
||||
const loadMoreBtn = await parentEl.$('.show-more').catch(() => null);
|
||||
if (!loadMoreBtn) break;
|
||||
|
||||
const isVisible = await loadMoreBtn.isVisible().catch(() => false);
|
||||
if (!isVisible) break;
|
||||
|
||||
await loadMoreBtn.click().catch(() => {});
|
||||
await page.waitForTimeout(LOAD_MORE_DELAY_MS);
|
||||
clicks++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.warn({ feedId, commentId }, 'Target parent-comment not found in DOM');
|
||||
}
|
||||
|
||||
if (clicks > 0) {
|
||||
log.debug({ commentId, clicks }, 'Clicked "load more replies" button');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Read sub-comments from the Vue store for this specific comment.
|
||||
// -----------------------------------------------------------------------
|
||||
const subComments = await extractSubCommentsFromStore(page, feedId, commentId, maxCount);
|
||||
|
||||
log.info(
|
||||
{ feedId, commentId, subCommentCount: subComments.length, maxCount },
|
||||
'Sub-comments extraction complete',
|
||||
);
|
||||
|
||||
return subComments;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse feed detail from __INITIAL_STATE__.
|
||||
*/
|
||||
function parseDetailFromState(
|
||||
state: RawDetailState,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): FeedDetail | null {
|
||||
// Try multiple possible locations for note data.
|
||||
let noteData: RawNoteData | undefined;
|
||||
let mapComments: RawCommentData[] | undefined;
|
||||
|
||||
// Location 1: state.noteData.data.noteData (common structure)
|
||||
noteData = state.noteData?.data?.noteData;
|
||||
|
||||
// Location 2: state.noteData.noteData
|
||||
if (!noteData) {
|
||||
noteData = state.noteData?.noteData;
|
||||
}
|
||||
|
||||
// Location 3: state.note.noteDetailMap[feedId] — note + comments are siblings
|
||||
if (!noteData && state.note?.noteDetailMap) {
|
||||
const mapEntry = state.note.noteDetailMap[feedId];
|
||||
noteData = mapEntry?.note;
|
||||
mapComments = mapEntry?.comments;
|
||||
}
|
||||
|
||||
// Location 4: state.note.note or state.note.noteData
|
||||
if (!noteData) {
|
||||
noteData = state.note?.note ?? state.note?.noteData;
|
||||
}
|
||||
|
||||
if (!noteData) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const id = noteData.noteId ?? noteData.id ?? feedId;
|
||||
const title = noteData.title ?? '';
|
||||
const description = noteData.desc ?? noteData.description ?? '';
|
||||
const rawType = noteData.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Images
|
||||
const rawImages = noteData.imageList ?? noteData.image_list ?? [];
|
||||
const images = rawImages
|
||||
.map((img) => {
|
||||
if (img.url) return ensureHttps(img.url);
|
||||
if (img.urlPre) return ensureHttps(img.urlPre);
|
||||
if (img.urlDefault) return ensureHttps(img.urlDefault);
|
||||
if (img.url_pre) return ensureHttps(img.url_pre);
|
||||
if (img.url_default) return ensureHttps(img.url_default);
|
||||
const info = img.infoList ?? img.info_list;
|
||||
if (info && info.length > 0 && info[0]?.url) return ensureHttps(info[0].url);
|
||||
return '';
|
||||
})
|
||||
.filter((url) => url !== '');
|
||||
|
||||
// Video URL
|
||||
let videoUrl: string | undefined;
|
||||
if (noteData.video) {
|
||||
const v = noteData.video;
|
||||
videoUrl =
|
||||
v.url ??
|
||||
v.media?.stream?.h264?.[0]?.masterUrl ??
|
||||
v.media?.stream?.h264?.[0]?.master_url ??
|
||||
v.media?.video?.url ??
|
||||
undefined;
|
||||
if (videoUrl) videoUrl = ensureHttps(videoUrl);
|
||||
}
|
||||
|
||||
// Tags
|
||||
const rawTags = noteData.tagList ?? noteData.tag_list ?? [];
|
||||
const tags = rawTags
|
||||
.map((t) => t.name ?? '')
|
||||
.filter((name) => name !== '');
|
||||
|
||||
// Interaction stats
|
||||
const interact = noteData.interactInfo ?? noteData.interact_info;
|
||||
const likeCount = parseCountString(
|
||||
interact?.likedCount ?? interact?.liked_count ?? '0',
|
||||
);
|
||||
const collectCount = parseCountString(
|
||||
interact?.collectedCount ?? interact?.collected_count ?? '0',
|
||||
);
|
||||
const commentCount = parseCountString(
|
||||
interact?.commentCount ?? interact?.comment_count ?? '0',
|
||||
);
|
||||
// Timestamps
|
||||
const createTimeRaw = noteData.time ?? noteData.createTime ?? noteData.create_time;
|
||||
const createTime = createTimeRaw
|
||||
? new Date(typeof createTimeRaw === 'number' && createTimeRaw < 1e12 ? createTimeRaw * 1000 : createTimeRaw).toISOString()
|
||||
: '';
|
||||
const lastUpdateTimeRaw = noteData.lastUpdateTime ?? noteData.last_update_time;
|
||||
const lastUpdateTime = lastUpdateTimeRaw
|
||||
? new Date(typeof lastUpdateTimeRaw === 'number' && lastUpdateTimeRaw < 1e12 ? lastUpdateTimeRaw * 1000 : lastUpdateTimeRaw).toISOString()
|
||||
: '';
|
||||
|
||||
const ipLocation = noteData.ipLocation ?? noteData.ip_location ?? '';
|
||||
|
||||
// User
|
||||
const rawUser = noteData.user;
|
||||
const user = {
|
||||
id: rawUser?.userId ?? rawUser?.user_id ?? '',
|
||||
nickname: rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '',
|
||||
avatar: rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '',
|
||||
};
|
||||
|
||||
// Comments: prefer map-level comments (noteDetailMap[id].comments),
|
||||
// fall back to noteData.comments.
|
||||
const rawComments = mapComments ?? noteData.comments ?? [];
|
||||
const comments = rawComments.map(parseRawComment).filter((c): c is Comment => c !== null);
|
||||
|
||||
const resolvedXsecToken = noteData.xsecToken ?? noteData.xsec_token ?? xsecToken;
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken: resolvedXsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
images,
|
||||
videoUrl,
|
||||
tags,
|
||||
likeCount,
|
||||
collectCount,
|
||||
commentCount,
|
||||
isLiked: false,
|
||||
isFavorited: false,
|
||||
createTime,
|
||||
lastUpdateTime,
|
||||
ipLocation,
|
||||
user,
|
||||
comments,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a raw comment from __INITIAL_STATE__ into a Comment object.
|
||||
*/
|
||||
function parseRawComment(raw: RawCommentData): Comment | null {
|
||||
const id = raw.id ?? '';
|
||||
if (!id) return null;
|
||||
|
||||
const userInfo = raw.userInfo;
|
||||
const userId = raw.userId ?? raw.user_id ?? userInfo?.userId ?? userInfo?.user_id ?? '';
|
||||
const nickname = userInfo?.nickname ?? userInfo?.nick_name ?? '';
|
||||
const avatar = userInfo?.image ?? userInfo?.avatar ?? '';
|
||||
const content = raw.content ?? '';
|
||||
|
||||
const likeCountRaw = raw.likeCount ?? raw.like_count ?? 0;
|
||||
const likeCount = typeof likeCountRaw === 'string'
|
||||
? parseCountString(likeCountRaw)
|
||||
: likeCountRaw;
|
||||
|
||||
const createTimeRaw = raw.createTime ?? raw.create_time;
|
||||
const createTime = createTimeRaw
|
||||
? new Date(typeof createTimeRaw === 'number' && createTimeRaw < 1e12 ? createTimeRaw * 1000 : createTimeRaw).toISOString()
|
||||
: '';
|
||||
|
||||
const ipLocation = raw.ipLocation ?? raw.ip_location ?? '';
|
||||
|
||||
const rawSubs = raw.subComments ?? raw.sub_comments ?? [];
|
||||
const subComments = rawSubs.map(parseRawComment).filter((c): c is Comment => c !== null);
|
||||
|
||||
const rawSubCount = raw.subCommentCount ?? raw.sub_comment_count;
|
||||
const subCommentCount = rawSubCount
|
||||
? (typeof rawSubCount === 'string' ? parseInt(rawSubCount, 10) || 0 : rawSubCount)
|
||||
: subComments.length;
|
||||
|
||||
return {
|
||||
id,
|
||||
userId,
|
||||
nickname,
|
||||
avatar,
|
||||
content,
|
||||
likeCount,
|
||||
createTime,
|
||||
ipLocation,
|
||||
subCommentCount,
|
||||
subComments,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape feed detail from the rendered DOM using Playwright's Node-side
|
||||
* APIs ($eval, $$eval, $) to avoid needing DOM lib types.
|
||||
*/
|
||||
async function scrapeDetailFromDom(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): Promise<FeedDetail> {
|
||||
// Title
|
||||
const title = await page
|
||||
.$eval(SEL.title, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Description
|
||||
const description = await page
|
||||
.$eval(SEL.description, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Images — try image list first, then hero image.
|
||||
let images: string[] = await page
|
||||
.$$eval(SEL.images, (imgs) =>
|
||||
imgs.map((img) => img.getAttribute('src') ?? '').filter(Boolean),
|
||||
)
|
||||
.catch(() => [] as string[]);
|
||||
|
||||
if (images.length === 0) {
|
||||
const heroSrc = await page
|
||||
.$eval(SEL.heroImage, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (heroSrc) images = [heroSrc];
|
||||
}
|
||||
|
||||
// Video URL
|
||||
let videoUrl: string | undefined;
|
||||
const videoSrc = await page
|
||||
.$eval(SEL.video, (video) => video.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (videoSrc) {
|
||||
videoUrl = videoSrc;
|
||||
} else {
|
||||
const sourceSrc = await page
|
||||
.$eval(SEL.videoSource, (source) => source.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
if (sourceSrc) videoUrl = sourceSrc;
|
||||
}
|
||||
|
||||
const type: 'normal' | 'video' = videoUrl ? 'video' : 'normal';
|
||||
|
||||
// Tags
|
||||
const tags: string[] = await page
|
||||
.$$eval(SEL.tags, (els) =>
|
||||
els
|
||||
.map((el) => el.textContent?.trim().replace(/^#/, '') ?? '')
|
||||
.filter(Boolean),
|
||||
)
|
||||
.catch(() => [] as string[]);
|
||||
|
||||
// Stats
|
||||
const likeCount = await extractCount(page, SEL.likeCount);
|
||||
const collectCount = await extractCount(page, SEL.collectCount);
|
||||
const commentCount = await extractCount(page, SEL.commentCount);
|
||||
// Create time
|
||||
const createTime = await page
|
||||
.$eval(SEL.createTime, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// IP location
|
||||
const ipLocation = await page
|
||||
.$eval(SEL.ipLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Author info
|
||||
const authorName = await page
|
||||
.$eval(SEL.authorName, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
const authorAvatar = await page
|
||||
.$eval(SEL.authorAvatar, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Extract author ID from the author link href.
|
||||
const authorLinkHref = await page
|
||||
.$eval(SEL.authorLink, (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const authorIdMatch = authorLinkHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const authorId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
return {
|
||||
id: feedId,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
images,
|
||||
videoUrl,
|
||||
tags,
|
||||
likeCount,
|
||||
collectCount,
|
||||
commentCount,
|
||||
isLiked: false,
|
||||
isFavorited: false,
|
||||
createTime,
|
||||
lastUpdateTime: '',
|
||||
ipLocation,
|
||||
user: {
|
||||
id: authorId,
|
||||
nickname: authorName,
|
||||
avatar: authorAvatar,
|
||||
},
|
||||
comments: [],
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Comment extraction from Vue store (async-loaded data)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Wait for comments to be loaded in the Vue store, then extract them.
|
||||
*
|
||||
* XHS loads comments asynchronously after page render. The store at
|
||||
* `__INITIAL_STATE__.note.noteDetailMap[feedId].comments` starts with
|
||||
* `{ list: [], firstRequestFinish: false }` and is populated by the
|
||||
* frontend JS. We poll until `firstRequestFinish` becomes true.
|
||||
*/
|
||||
async function extractCommentsFromStore(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
): Promise<Comment[]> {
|
||||
const rawComments = await page.evaluate(
|
||||
async (id: string) => {
|
||||
const maxWaitMs = 5000;
|
||||
const pollMs = 200;
|
||||
let waited = 0;
|
||||
|
||||
while (waited < maxWaitMs) {
|
||||
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
||||
Record<string, unknown> | undefined;
|
||||
const note = state?.note as Record<string, unknown> | undefined;
|
||||
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
||||
const entry = map?.[id];
|
||||
const comments = entry?.comments as { list?: unknown[]; firstRequestFinish?: boolean } | undefined;
|
||||
|
||||
if (comments?.firstRequestFinish && comments.list) {
|
||||
return JSON.parse(JSON.stringify(comments.list));
|
||||
}
|
||||
|
||||
await new Promise((r) => setTimeout(r, pollMs));
|
||||
waited += pollMs;
|
||||
}
|
||||
|
||||
// Timeout — return whatever is available.
|
||||
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
||||
Record<string, unknown> | undefined;
|
||||
const note = state?.note as Record<string, unknown> | undefined;
|
||||
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
||||
const entry = map?.[feedId];
|
||||
const comments = entry?.comments as { list?: unknown[] } | undefined;
|
||||
return comments?.list ? JSON.parse(JSON.stringify(comments.list)) : [];
|
||||
},
|
||||
feedId,
|
||||
) as RawCommentData[];
|
||||
|
||||
return rawComments
|
||||
.map(parseRawComment)
|
||||
.filter((c): c is Comment => c !== null);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sub-comment extraction from Vue store
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Wait for the comments store to finish its initial request for a given feed.
|
||||
*/
|
||||
async function waitForCommentsStoreReady(page: Page, feedId: string): Promise<void> {
|
||||
await page.evaluate(
|
||||
async (id: string) => {
|
||||
const maxWaitMs = 5000;
|
||||
const pollMs = 200;
|
||||
let waited = 0;
|
||||
while (waited < maxWaitMs) {
|
||||
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
||||
Record<string, unknown> | undefined;
|
||||
const note = state?.note as Record<string, unknown> | undefined;
|
||||
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
||||
const entry = map?.[id];
|
||||
const comments = entry?.comments as { firstRequestFinish?: boolean } | undefined;
|
||||
if (comments?.firstRequestFinish) return;
|
||||
await new Promise((r) => setTimeout(r, pollMs));
|
||||
waited += pollMs;
|
||||
}
|
||||
},
|
||||
feedId,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick check: how many sub-comments does the store currently have for
|
||||
* a given parent comment? Used to decide whether to keep clicking.
|
||||
*/
|
||||
async function getStoreSubCommentCount(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
commentId: string,
|
||||
): Promise<number> {
|
||||
return page.evaluate(
|
||||
(args: { feedId: string; commentId: string }) => {
|
||||
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
||||
Record<string, unknown> | undefined;
|
||||
const note = state?.note as Record<string, unknown> | undefined;
|
||||
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
||||
const entry = map?.[args.feedId];
|
||||
const comments = entry?.comments as { list?: Array<Record<string, unknown>> } | undefined;
|
||||
if (!comments?.list) return 0;
|
||||
const parent = comments.list.find((c) => c.id === args.commentId);
|
||||
if (!parent) return 0;
|
||||
const subs = (parent.subComments ?? parent.sub_comments ?? []) as unknown[];
|
||||
return subs.length;
|
||||
},
|
||||
{ feedId, commentId },
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read sub-comments for a specific parent comment from the Vue store,
|
||||
* capped at `maxCount`.
|
||||
*
|
||||
* The store structure is:
|
||||
* `__INITIAL_STATE__.note.noteDetailMap[feedId].comments.list[]`
|
||||
* Each item in `list` has `subComments[]`, `subCommentCount`,
|
||||
* `subCommentHasMore`, and `subCommentCursor`.
|
||||
*/
|
||||
async function extractSubCommentsFromStore(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
commentId: string,
|
||||
maxCount: number,
|
||||
): Promise<Comment[]> {
|
||||
const rawSubComments = await page.evaluate(
|
||||
(args: { feedId: string; commentId: string; maxCount: number }) => {
|
||||
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
||||
Record<string, unknown> | undefined;
|
||||
const note = state?.note as Record<string, unknown> | undefined;
|
||||
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
||||
const entry = map?.[args.feedId];
|
||||
const comments = entry?.comments as { list?: Array<Record<string, unknown>> } | undefined;
|
||||
if (!comments?.list) return [];
|
||||
|
||||
const parent = comments.list.find(
|
||||
(c) => c.id === args.commentId,
|
||||
);
|
||||
if (!parent) return [];
|
||||
|
||||
const subs = (parent.subComments ?? parent.sub_comments ?? []) as unknown[];
|
||||
return JSON.parse(JSON.stringify(subs.slice(0, args.maxCount)));
|
||||
},
|
||||
{ feedId, commentId, maxCount },
|
||||
) as RawCommentData[];
|
||||
|
||||
return rawSubComments
|
||||
.map(parseRawComment)
|
||||
.filter((c): c is Comment => c !== null);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract a numeric count from an element on the page, handling
|
||||
* abbreviations like "1.2w" and "3k".
|
||||
*/
|
||||
async function extractCount(page: Page, selector: string): Promise<number> {
|
||||
const text = await page
|
||||
.$eval(selector, (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
return parseCountString(text);
|
||||
}
|
||||
@@ -0,0 +1,435 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import type { Feed } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
const log = logger.child({ module: 'xhs-feeds' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types (partial — only the fields we care about)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Shape of a single feed item inside __INITIAL_STATE__.homeFeed.feeds */
|
||||
interface RawFeedItem {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
type?: string;
|
||||
noteCard?: RawNoteCard;
|
||||
model_type?: string;
|
||||
cover?: RawImage;
|
||||
user?: RawUser;
|
||||
interactInfo?: RawInteractInfo;
|
||||
interact_info?: RawInteractInfo;
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
interface RawNoteCard {
|
||||
noteId?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: RawImage;
|
||||
user?: RawUser;
|
||||
interactInfo?: RawInteractInfo;
|
||||
interact_info?: RawInteractInfo;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
}
|
||||
|
||||
interface RawImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawInteractInfo {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Partial shape of the __INITIAL_STATE__ global variable.
|
||||
* Xiaohongshu places SSR data here for hydration.
|
||||
*/
|
||||
interface InitialState {
|
||||
homeFeed?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
feed?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
explore?: {
|
||||
feeds?: RawFeedItem[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// listFeeds — extract feeds from the explore page
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to the Xiaohongshu explore (home) page and extract the feed list
|
||||
* from the server-rendered `__INITIAL_STATE__` global variable.
|
||||
*
|
||||
* Falls back to DOM scraping if `__INITIAL_STATE__` is unavailable or does
|
||||
* not contain feed data.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @returns An array of Feed objects.
|
||||
*/
|
||||
export async function listFeeds(page: Page): Promise<Feed[]> {
|
||||
log.debug('Navigating to explore page');
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Allow the page a moment for client-side hydration to settle.
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page);
|
||||
|
||||
if (initialState) {
|
||||
const feeds = parseFeedsFromState(initialState);
|
||||
if (feeds.length > 0) {
|
||||
log.info({ count: feeds.length }, 'Extracted feeds from __INITIAL_STATE__');
|
||||
return feeds;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no feeds extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping using Playwright Node-side API
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug('Falling back to DOM scraping for feed list');
|
||||
const feeds = await scrapeFeedsFromDom(page);
|
||||
log.info({ count: feeds.length }, 'Extracted feeds from DOM');
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ extraction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Attempt to extract the `__INITIAL_STATE__` object from the page.
|
||||
* Returns `null` if the variable is not present or not an object.
|
||||
*
|
||||
* The evaluate callback runs in the browser context. We return `unknown`
|
||||
* and cast on the Node side to avoid needing DOM lib types.
|
||||
*/
|
||||
async function extractInitialState(page: Page): Promise<InitialState | null> {
|
||||
try {
|
||||
// Only extract keys we care about — the full __INITIAL_STATE__ can contain
|
||||
// circular references or be too large, causing serialization errors.
|
||||
// Return a JSON string from the browser to avoid Playwright's own
|
||||
// serialization hitting Vue reactive proxy circular references.
|
||||
// We use structuredClone to break Vue proxy wrappers, then stringify.
|
||||
const json: string | null = await page.evaluate(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const s = (window as any).__INITIAL_STATE__;
|
||||
if (!s || typeof s !== 'object') return null;
|
||||
try {
|
||||
// structuredClone strips Vue proxies and produces a plain object.
|
||||
const plain = structuredClone({ noteData: s.noteData, note: s.note, feed: s.feed, feeds: s.feeds, user: s.user, userProfile: s.userProfile, homeFeed: s.homeFeed, explore: s.explore });
|
||||
return JSON.stringify(plain);
|
||||
} catch {
|
||||
// structuredClone may fail on some Vue internals — fall back to
|
||||
// stringify with a depth counter to avoid stack overflow.
|
||||
let depth = 0;
|
||||
const MAX_DEPTH = 20;
|
||||
const seen = new Set<unknown>();
|
||||
const result = JSON.stringify(
|
||||
{ noteData: s.noteData, note: s.note, feed: s.feed, feeds: s.feeds, user: s.user, userProfile: s.userProfile, homeFeed: s.homeFeed, explore: s.explore },
|
||||
function (_key, value) {
|
||||
if (typeof value === 'function') return undefined;
|
||||
if (typeof value === 'object' && value !== null) {
|
||||
if (seen.has(value) || depth > MAX_DEPTH) return undefined;
|
||||
seen.add(value);
|
||||
depth++;
|
||||
}
|
||||
return value;
|
||||
},
|
||||
);
|
||||
return result;
|
||||
}
|
||||
});
|
||||
|
||||
const state: unknown = json ? JSON.parse(json) : null;
|
||||
|
||||
if (state && typeof state === 'object') {
|
||||
return state as InitialState;
|
||||
}
|
||||
|
||||
log.debug('__INITIAL_STATE__ is not present or not an object');
|
||||
return null;
|
||||
} catch (err: unknown) {
|
||||
log.warn({ err }, 'Failed to extract __INITIAL_STATE__');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Feed parsing from __INITIAL_STATE__
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse the raw initial state into structured Feed objects.
|
||||
* Handles multiple possible shapes of the state data (Xiaohongshu has
|
||||
* changed the structure over time).
|
||||
*/
|
||||
function parseFeedsFromState(state: InitialState): Feed[] {
|
||||
// Try multiple known locations for the feed list.
|
||||
const rawFeeds: RawFeedItem[] =
|
||||
state.homeFeed?.feeds ??
|
||||
state.feed?.feeds ??
|
||||
state.explore?.feeds ??
|
||||
[];
|
||||
|
||||
if (rawFeeds.length === 0) {
|
||||
// Walk top-level keys looking for an array that resembles feeds.
|
||||
for (const key of Object.keys(state)) {
|
||||
const value = state[key];
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const obj = value as Record<string, unknown>;
|
||||
if (Array.isArray(obj['feeds'])) {
|
||||
return (obj['feeds'] as RawFeedItem[])
|
||||
.map(parseRawFeedItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawFeeds
|
||||
.map(parseRawFeedItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a single raw feed item into a structured Feed, or return null if
|
||||
* insufficient data is available.
|
||||
*/
|
||||
function parseRawFeedItem(raw: RawFeedItem): Feed | null {
|
||||
// The feed data can be either flat or nested under `noteCard`.
|
||||
const card = raw.noteCard;
|
||||
|
||||
const id =
|
||||
raw.id ?? raw.noteId ?? raw.note_id ?? card?.noteId ?? '';
|
||||
const xsecToken =
|
||||
raw.xsecToken ?? raw.xsec_token ?? card?.xsecToken ?? card?.xsec_token ?? '';
|
||||
const title =
|
||||
raw.displayTitle ?? raw.display_title ?? raw.title ??
|
||||
card?.displayTitle ?? card?.display_title ?? card?.title ?? '';
|
||||
const description =
|
||||
raw.desc ?? raw.description ?? card?.desc ?? '';
|
||||
|
||||
// Type detection — default to 'normal' if unclear.
|
||||
const rawType = raw.type ?? raw.model_type ?? card?.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Cover image URL — try multiple possible locations.
|
||||
const rawCover = raw.cover ?? card?.cover;
|
||||
const coverUrl = extractImageUrl(rawCover);
|
||||
|
||||
// User info
|
||||
const rawUser = raw.user ?? card?.user;
|
||||
const userId = rawUser?.userId ?? rawUser?.user_id ?? '';
|
||||
const nickname =
|
||||
rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '';
|
||||
const avatar =
|
||||
rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '';
|
||||
|
||||
// Like count — can be in interactInfo, or directly on the item.
|
||||
const interactInfo = raw.interactInfo ?? raw.interact_info ?? card?.interactInfo ?? card?.interact_info;
|
||||
const likeCountStr =
|
||||
interactInfo?.likedCount ?? interactInfo?.liked_count ??
|
||||
interactInfo?.likeCount ?? interactInfo?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
// Must have at least an ID to be a valid feed.
|
||||
if (!id) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: {
|
||||
id: userId,
|
||||
nickname,
|
||||
avatar,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape feed data using Playwright's Node-side selectors (`page.$$`,
|
||||
* `page.$eval`) to avoid needing DOM lib types in our TypeScript config.
|
||||
*/
|
||||
async function scrapeFeedsFromDom(page: Page): Promise<Feed[]> {
|
||||
// Wait for at least one feed card to appear.
|
||||
await page.waitForSelector('.note-item', { timeout: 10_000 }).catch(() => null);
|
||||
|
||||
const cardElements = await page.$$('.note-item');
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of cardElements) {
|
||||
try {
|
||||
// Extract feed link to get ID and xsec_token from the URL.
|
||||
const href = await card.$eval('a.cover', (el) => el.getAttribute('href') ?? '').catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const xsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
// Cover image
|
||||
const coverUrl = await card.$eval('a.cover img', (el) => el.getAttribute('src') ?? '').catch(() => '');
|
||||
|
||||
// Title
|
||||
const title = await card.$eval('.footer .title', (el) => el.textContent?.trim() ?? '').catch(() => '');
|
||||
|
||||
// Author name
|
||||
const nickname = await card.$eval('.footer .author-wrapper .name', (el) => el.textContent?.trim() ?? '').catch(() => '');
|
||||
|
||||
// Author avatar
|
||||
const avatar = await card.$eval('.footer .author-wrapper .author-head img', (el) => el.getAttribute('src') ?? '').catch(() => '');
|
||||
|
||||
// Author ID from link
|
||||
const authorHref = await card.$eval('.footer .author-wrapper a', (el) => el.getAttribute('href') ?? '').catch(() => '');
|
||||
const authorIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
// Like count
|
||||
const likeText = await card.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0').catch(() => '0');
|
||||
const likeCount = parseCountString(likeText);
|
||||
|
||||
// Type — check if there is a video icon.
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
});
|
||||
} catch {
|
||||
// Skip cards that fail to parse.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract an image URL from the raw cover image object, trying multiple
|
||||
* possible field names.
|
||||
*/
|
||||
function extractImageUrl(raw: RawImage | undefined): string {
|
||||
if (!raw) return '';
|
||||
|
||||
// Direct URL fields
|
||||
if (raw.url) return ensureHttps(raw.url);
|
||||
if (raw.urlPre) return ensureHttps(raw.urlPre);
|
||||
if (raw.urlDefault) return ensureHttps(raw.urlDefault);
|
||||
if (raw.url_pre) return ensureHttps(raw.url_pre);
|
||||
if (raw.url_default) return ensureHttps(raw.url_default);
|
||||
|
||||
// infoList — array of image variants, take the first.
|
||||
const infoList = raw.infoList ?? raw.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
return ensureHttps(infoList[0].url);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure a URL has an https:// prefix. Xiaohongshu sometimes returns
|
||||
* protocol-relative URLs (//sns-...) or bare http.
|
||||
*/
|
||||
function ensureHttps(url: string): string {
|
||||
if (url.startsWith('//')) return `https:${url}`;
|
||||
if (url.startsWith('http://')) return url.replace('http://', 'https://');
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a count string that may contain abbreviations like "1.2w" (万) or
|
||||
* "3k" into a number.
|
||||
*/
|
||||
function parseCountString(str: string): number {
|
||||
if (!str) return 0;
|
||||
const cleaned = str.replace(/,/g, '').trim().toLowerCase();
|
||||
if (cleaned.includes('w') || cleaned.includes('万')) {
|
||||
return Math.round(parseFloat(cleaned) * 10_000);
|
||||
}
|
||||
if (cleaned.includes('k') || cleaned.includes('千')) {
|
||||
return Math.round(parseFloat(cleaned) * 1_000);
|
||||
}
|
||||
const n = parseInt(cleaned, 10);
|
||||
return isNaN(n) ? 0 : n;
|
||||
}
|
||||
|
||||
// Re-export for use by other modules (search, user-profile, feed-detail)
|
||||
// that need the same extraction / parsing helpers.
|
||||
export { extractInitialState, parseCountString, ensureHttps };
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,174 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FEED_DETAIL_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** Wait after clicking like/favorite to let the state update. */
|
||||
const TOGGLE_SETTLE_MS = 1_000;
|
||||
|
||||
const selDetail = XHS_SELECTORS.feedDetail;
|
||||
|
||||
const log = logger.child({ module: 'xhs-interaction' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function buildFeedUrl(feedId: string, xsecToken: string): string {
|
||||
return `${FEED_DETAIL_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_search`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Click the LAST element matching the selector (the overlay's button).
|
||||
* XHS opens note detail as an overlay on /explore — the overlay is rendered
|
||||
* LAST in DOM, so .last() targets the correct button.
|
||||
*/
|
||||
async function clickLastMatch(page: Page, selector: string): Promise<boolean> {
|
||||
try {
|
||||
await page.locator(selector).last().click({ timeout: 5_000 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read post-click state by checking the SVG icon href inside the button.
|
||||
* XHS uses xlink:href #like vs #liked, #collect vs #collected.
|
||||
*/
|
||||
async function readState(page: Page, btnSelector: string, activeHref: string): Promise<boolean> {
|
||||
return page.locator(btnSelector).last()
|
||||
.evaluate((el, href) => el.querySelector('use')?.getAttribute('xlink:href') === href, activeHref)
|
||||
.catch(() => false);
|
||||
}
|
||||
|
||||
async function openFeedOverlay(page: Page, feedId: string, xsecToken: string): Promise<void> {
|
||||
await page.goto(buildFeedUrl(feedId, xsecToken), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForSelector(selDetail.noteContainer, { timeout: 10_000 });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toggleLike — pure toggle, clicks the like button once
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function toggleLike(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): Promise<{ success: boolean; liked: boolean }> {
|
||||
log.info({ feedId }, 'Toggling like on note');
|
||||
|
||||
await openFeedOverlay(page, feedId, xsecToken);
|
||||
|
||||
const clicked = await clickLastMatch(page, '.engage-bar-style .like-wrapper');
|
||||
if (!clicked) {
|
||||
log.warn('Like button not found in note detail overlay');
|
||||
return { success: false, liked: false };
|
||||
}
|
||||
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
|
||||
const liked = await readState(page, '.engage-bar-style .like-wrapper', '#liked');
|
||||
log.info({ feedId, liked }, 'Like toggle complete');
|
||||
return { success: true, liked };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toggleFavorite — pure toggle, clicks the favorite button once
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function toggleFavorite(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
): Promise<{ success: boolean; favorited: boolean }> {
|
||||
log.info({ feedId }, 'Toggling favorite on note');
|
||||
|
||||
await openFeedOverlay(page, feedId, xsecToken);
|
||||
|
||||
const clicked = await clickLastMatch(page, '.engage-bar-style .collect-wrapper');
|
||||
if (!clicked) {
|
||||
log.warn('Favorite button not found in note detail overlay');
|
||||
return { success: false, favorited: false };
|
||||
}
|
||||
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
|
||||
const favorited = await readState(page, '.engage-bar-style .collect-wrapper', '#collected');
|
||||
log.info({ feedId, favorited }, 'Favorite toggle complete');
|
||||
return { success: true, favorited };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// setLikeState / setFavoriteState — idempotent state-setting operations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function setLikeState(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
targetLiked: boolean,
|
||||
): Promise<{ success: boolean; liked: boolean; changed: boolean }> {
|
||||
log.info({ feedId, targetLiked }, 'Setting like state on note');
|
||||
|
||||
await openFeedOverlay(page, feedId, xsecToken);
|
||||
|
||||
const currentLiked = await readState(page, '.engage-bar-style .like-wrapper', '#liked');
|
||||
if (currentLiked === targetLiked) {
|
||||
return { success: true, liked: currentLiked, changed: false };
|
||||
}
|
||||
|
||||
const clicked = await clickLastMatch(page, '.engage-bar-style .like-wrapper');
|
||||
if (!clicked) {
|
||||
log.warn('Like button not found in note detail overlay');
|
||||
return { success: false, liked: currentLiked, changed: false };
|
||||
}
|
||||
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
const liked = await readState(page, '.engage-bar-style .like-wrapper', '#liked');
|
||||
return {
|
||||
success: liked === targetLiked,
|
||||
liked,
|
||||
changed: liked !== currentLiked,
|
||||
};
|
||||
}
|
||||
|
||||
export async function setFavoriteState(
|
||||
page: Page,
|
||||
feedId: string,
|
||||
xsecToken: string,
|
||||
targetFavorited: boolean,
|
||||
): Promise<{ success: boolean; favorited: boolean; changed: boolean }> {
|
||||
log.info({ feedId, targetFavorited }, 'Setting favorite state on note');
|
||||
|
||||
await openFeedOverlay(page, feedId, xsecToken);
|
||||
|
||||
const currentFavorited = await readState(
|
||||
page,
|
||||
'.engage-bar-style .collect-wrapper',
|
||||
'#collected',
|
||||
);
|
||||
if (currentFavorited === targetFavorited) {
|
||||
return { success: true, favorited: currentFavorited, changed: false };
|
||||
}
|
||||
|
||||
const clicked = await clickLastMatch(page, '.engage-bar-style .collect-wrapper');
|
||||
if (!clicked) {
|
||||
log.warn('Favorite button not found in note detail overlay');
|
||||
return { success: false, favorited: currentFavorited, changed: false };
|
||||
}
|
||||
|
||||
await page.waitForTimeout(TOGGLE_SETTLE_MS);
|
||||
const favorited = await readState(page, '.engage-bar-style .collect-wrapper', '#collected');
|
||||
return {
|
||||
success: favorited === targetFavorited,
|
||||
favorited,
|
||||
changed: favorited !== currentFavorited,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
import { chromium } from 'rebrowser-playwright';
|
||||
import type { Page, BrowserContext } from 'rebrowser-playwright';
|
||||
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { config } from '@social/core/config/index.js';
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { cookieStore } from '@social/core/cookie/store.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import type { LoginStatus, QRCodeResult } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
|
||||
/** How long to wait for the user to scan the QR code (4 minutes). */
|
||||
const QR_SCAN_TIMEOUT_MS = 4 * 60 * 1000;
|
||||
|
||||
/** Safety timeout for the login browser window (5 minutes). */
|
||||
const LOGIN_BROWSER_SAFETY_MS = 5 * 60 * 1000;
|
||||
|
||||
const log = logger.child({ module: 'xhs-login' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// checkLoginStatus
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to the explore page and determine whether the user is logged in
|
||||
* by checking for the presence of the logged-in indicator element.
|
||||
*
|
||||
* @param page - A Playwright Page already managed by the caller.
|
||||
* @returns An object indicating login status.
|
||||
*/
|
||||
export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Give the page a moment for client-side rendering to settle.
|
||||
const indicator = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 5_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (!indicator) {
|
||||
return { loggedIn: false };
|
||||
}
|
||||
|
||||
// Attempt to extract a username from the indicator area.
|
||||
const username = await indicator.textContent().catch(() => null);
|
||||
|
||||
// Attempt to extract the logged-in user's avatar URL.
|
||||
const avatar = await page
|
||||
.$eval(XHS_SELECTORS.login.userAvatar, (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Attempt to extract the userId from the profile link href.
|
||||
const userLinkHref = await page
|
||||
.$eval(XHS_SELECTORS.login.userLink, (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const userIdMatch = userLinkHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = userIdMatch?.[1] ?? '';
|
||||
|
||||
return {
|
||||
loggedIn: true,
|
||||
...(username ? { username: username.trim() } : {}),
|
||||
...(avatar ? { avatar } : {}),
|
||||
...(userId ? { userId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getLoginQRCode
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Open the explore page in a **non-headless** browser, wait for the login
|
||||
* modal QR code, and return the image data.
|
||||
*
|
||||
* Why non-headless? XHS detects unauthenticated headless requests and
|
||||
* redirects them to a security-restriction error page (code 300012:
|
||||
* "IP存在风险"). Once the user scans the QR code and cookies are saved,
|
||||
* all subsequent operations can use the normal headless BrowserManager.
|
||||
*
|
||||
* A fire-and-forget background task waits for the scan to complete, saves
|
||||
* cookies to disk, clears the main BrowserManager context (so it reloads
|
||||
* the fresh cookies on next use), then closes the login browser.
|
||||
*
|
||||
* @param browser - The shared BrowserManager instance (used to clear its
|
||||
* cached context after login so it picks up new cookies).
|
||||
* @returns QR code data or an indication that the user is already logged in.
|
||||
*/
|
||||
export async function getLoginQRCode(
|
||||
browser: BrowserManager,
|
||||
): Promise<QRCodeResult> {
|
||||
log.info('Launching non-headless browser for QR code login');
|
||||
|
||||
const loginBrowser = await chromium.launch({
|
||||
headless: false,
|
||||
...(config.browserBin ? { executablePath: config.browserBin } : {}),
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
],
|
||||
});
|
||||
|
||||
const ctx = await loginBrowser.newContext();
|
||||
const page = await ctx.newPage();
|
||||
|
||||
let released = false;
|
||||
|
||||
const release = async (): Promise<void> => {
|
||||
if (released) return;
|
||||
released = true;
|
||||
clearTimeout(safetyTimer);
|
||||
await loginBrowser.close().catch((err: unknown) => {
|
||||
log.warn({ err }, 'Failed to close login browser');
|
||||
});
|
||||
};
|
||||
|
||||
const safetyTimer = setTimeout(() => {
|
||||
if (!released) {
|
||||
log.warn('Login browser safety timeout: closing after 5 minutes');
|
||||
void release();
|
||||
}
|
||||
}, LOGIN_BROWSER_SAFETY_MS);
|
||||
|
||||
if (typeof safetyTimer === 'object' && 'unref' in safetyTimer) {
|
||||
safetyTimer.unref();
|
||||
}
|
||||
|
||||
try {
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
log.debug({ url: page.url() }, 'Login browser navigated');
|
||||
|
||||
// Check whether the user is already logged in via saved cookies.
|
||||
const alreadyLoggedIn = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, { timeout: 3_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
if (alreadyLoggedIn) {
|
||||
await release();
|
||||
return { qrcodeData: '', alreadyLoggedIn: true, timeout: '0' };
|
||||
}
|
||||
|
||||
// The login modal auto-appears after a few seconds (no button click needed).
|
||||
// Wait up to 20s; if still absent try the login button as a fallback.
|
||||
const qrVisible = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.qrCodeImage, { timeout: 20_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
if (!qrVisible) {
|
||||
const loginBtn = await page
|
||||
.waitForSelector(XHS_SELECTORS.login.loginButton, { timeout: 5_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (loginBtn) {
|
||||
await loginBtn.click();
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the QR code image to appear.
|
||||
const qrElement = await page.waitForSelector(
|
||||
XHS_SELECTORS.login.qrCodeImage,
|
||||
{ timeout: 10_000 },
|
||||
);
|
||||
|
||||
const qrcodeData = await qrElement.getAttribute('src');
|
||||
|
||||
if (!qrcodeData) {
|
||||
await release();
|
||||
throw new Error('QR code image src attribute is empty');
|
||||
}
|
||||
|
||||
// Fire-and-forget: wait for the user to scan the QR code in the background.
|
||||
waitForLoginAndRelease(page, ctx, browser, release).catch((err: unknown) => {
|
||||
log.error({ err }, 'Login wait flow encountered an unexpected error');
|
||||
});
|
||||
|
||||
return {
|
||||
qrcodeData,
|
||||
alreadyLoggedIn: false,
|
||||
timeout: '4m',
|
||||
};
|
||||
} catch (err) {
|
||||
await release();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// deleteCookies
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Delete persisted cookies and clear the in-memory browser context so the
|
||||
* next operation starts with a clean session.
|
||||
*/
|
||||
export async function deleteCookies(browser: BrowserManager): Promise<void> {
|
||||
await cookieStore.delete(PLATFORM);
|
||||
await browser.clearContext(PLATFORM);
|
||||
log.info('Xiaohongshu cookies deleted and browser context cleared');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal: waitForLoginAndRelease
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Background task that waits for the logged-in indicator to appear (meaning
|
||||
* the user has scanned the QR code).
|
||||
*
|
||||
* On success:
|
||||
* 1. Extracts storageState from the login browser context.
|
||||
* 2. Saves cookies to disk via CookieStore.
|
||||
* 3. Clears the main BrowserManager's cached context so the next
|
||||
* headless operation creates a fresh context that reloads the cookies.
|
||||
*
|
||||
* The login browser is closed in all cases (success, timeout, error).
|
||||
*/
|
||||
async function waitForLoginAndRelease(
|
||||
page: Page,
|
||||
ctx: BrowserContext,
|
||||
browser: BrowserManager,
|
||||
release: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
try {
|
||||
await page.waitForSelector(XHS_SELECTORS.login.loggedInIndicator, {
|
||||
timeout: QR_SCAN_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
log.info('QR code scanned — login detected, saving cookies');
|
||||
|
||||
const state = await ctx.storageState();
|
||||
await cookieStore.save(PLATFORM, state);
|
||||
|
||||
// Clear the headless BrowserManager's cached context so the next
|
||||
// withPage() call creates a new one that restores the saved cookies.
|
||||
await browser.clearContext(PLATFORM);
|
||||
|
||||
log.info('Cookies saved and headless context refreshed');
|
||||
} catch {
|
||||
// Timeout or browser closed — not an error.
|
||||
log.debug('Login wait ended without successful scan');
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const MAIN_EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
const CREATOR_WORKS_URL = 'https://creator.xiaohongshu.com/publish/works';
|
||||
|
||||
const log = logger.child({ module: 'xhs-my-notes' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface MyNote {
|
||||
noteId: string;
|
||||
title: string;
|
||||
coverUrl?: string;
|
||||
noteUrl: string;
|
||||
type: 'image' | 'video' | 'unknown';
|
||||
publishTime?: string;
|
||||
likeCount?: number;
|
||||
commentCount?: number;
|
||||
collectCount?: number;
|
||||
viewCount?: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// listMyNotes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to the creator center works page and extract the list of published notes.
|
||||
*/
|
||||
export async function listMyNotes(page: Page): Promise<MyNote[]> {
|
||||
log.info('Navigating to creator works page');
|
||||
|
||||
// Establish session on main site first (same pattern as publish).
|
||||
await page.goto(MAIN_EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
await page.goto(CREATOR_WORKS_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(2_000);
|
||||
|
||||
if (page.url().includes('/login')) {
|
||||
throw new Error('Creator center redirected to login — cookies may be expired');
|
||||
}
|
||||
|
||||
// Wait for note items to appear.
|
||||
await page.waitForSelector('.works-item, .note-item, [class*="works"], [class*="note-card"]', {
|
||||
timeout: 15_000,
|
||||
}).catch(() => {
|
||||
log.debug('Note list selector not found, attempting extraction anyway');
|
||||
});
|
||||
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const notes = await page.evaluate((): MyNote[] => {
|
||||
const results: MyNote[] = [];
|
||||
|
||||
// Try multiple selector patterns for different creator center versions.
|
||||
const candidates = Array.from(
|
||||
document.querySelectorAll('.works-item, .note-item, [class*="works-item"], [class*="note-card"]'),
|
||||
);
|
||||
|
||||
for (const el of candidates) {
|
||||
// Extract note ID from link href.
|
||||
const link = el.querySelector('a[href*="/explore/"], a[href*="/note/"]') as HTMLAnchorElement | null;
|
||||
const href = link?.href ?? '';
|
||||
|
||||
const idMatch = href.match(/\/explore\/([a-f0-9]+)|\/note\/([a-f0-9]+)/);
|
||||
const noteId = idMatch?.[1] ?? idMatch?.[2] ?? '';
|
||||
if (!noteId) continue;
|
||||
|
||||
const noteUrl = `https://www.xiaohongshu.com/explore/${noteId}`;
|
||||
|
||||
// Title.
|
||||
const titleEl = el.querySelector('.title, [class*="title"], h3, h4');
|
||||
const title = titleEl?.textContent?.trim() ?? '';
|
||||
|
||||
// Cover image.
|
||||
const imgEl = el.querySelector('img') as HTMLImageElement | null;
|
||||
const coverUrl = imgEl?.src ?? undefined;
|
||||
|
||||
// Note type — look for video indicator.
|
||||
const hasVideo = el.querySelector('[class*="video"], video, [class*="play"]') !== null;
|
||||
const type: 'image' | 'video' | 'unknown' = hasVideo ? 'video' : title || coverUrl ? 'image' : 'unknown';
|
||||
|
||||
// Stats — try to find numeric values in stat spans.
|
||||
const statEls = Array.from(el.querySelectorAll('[class*="count"], [class*="stat"], [class*="data"]'));
|
||||
const nums = statEls.map((s) => {
|
||||
const t = s.textContent?.trim() ?? '';
|
||||
const n = parseFloat(t.replace(/[万w]/i, '0000').replace(/[,,]/g, ''));
|
||||
return Number.isNaN(n) ? undefined : n;
|
||||
});
|
||||
|
||||
// Publish time.
|
||||
const timeEl = el.querySelector('time, [class*="time"], [class*="date"]');
|
||||
const publishTime = timeEl?.textContent?.trim() ?? undefined;
|
||||
|
||||
results.push({
|
||||
noteId,
|
||||
title,
|
||||
coverUrl,
|
||||
noteUrl,
|
||||
type,
|
||||
publishTime,
|
||||
likeCount: nums[0],
|
||||
commentCount: nums[1],
|
||||
collectCount: nums[2],
|
||||
viewCount: nums[3],
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
log.info({ count: notes.length }, 'My notes extracted');
|
||||
return notes;
|
||||
}
|
||||
@@ -0,0 +1,446 @@
|
||||
import crypto from 'node:crypto';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
import { config } from '@social/core/config/index.js';
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { DatabaseSync } from '@social/core/utils/sqlite.js';
|
||||
import type { CommentNotification } from './types.js';
|
||||
|
||||
export type NotificationTaskStatus =
|
||||
| 'new'
|
||||
| 'pending'
|
||||
| 'replied'
|
||||
| 'failed'
|
||||
| 'ignored';
|
||||
|
||||
interface NotificationRow {
|
||||
fingerprint: string;
|
||||
user_id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
type: string;
|
||||
time: string;
|
||||
feed_id: string;
|
||||
xsec_token: string;
|
||||
note_image: string;
|
||||
status: NotificationTaskStatus;
|
||||
first_seen_at: number;
|
||||
last_seen_at: number;
|
||||
retry_count: number;
|
||||
last_attempt_at: number | null;
|
||||
replied_at: number | null;
|
||||
reply_content: string | null;
|
||||
error_message: string | null;
|
||||
}
|
||||
|
||||
export interface NotificationTask {
|
||||
fingerprint: string;
|
||||
notification: CommentNotification;
|
||||
status: NotificationTaskStatus;
|
||||
firstSeenAt: string;
|
||||
lastSeenAt: string;
|
||||
retryCount: number;
|
||||
lastAttemptAt?: string;
|
||||
repliedAt?: string;
|
||||
replyContent?: string;
|
||||
errorMessage?: string;
|
||||
}
|
||||
|
||||
export interface NotificationUpsertResult {
|
||||
fetched: number;
|
||||
inserted: number;
|
||||
updated: number;
|
||||
}
|
||||
|
||||
export interface NotificationKeysetCursor {
|
||||
firstSeenAt: number;
|
||||
fingerprint: string;
|
||||
}
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
const DB_FILENAME = 'automation.db';
|
||||
const log = logger.child({ module: 'xhs-notification-state' });
|
||||
|
||||
export class NotificationStateStore {
|
||||
private readonly db: InstanceType<typeof DatabaseSync>;
|
||||
private readonly dbPath: string;
|
||||
|
||||
constructor(baseDir = config.cookieDir, dbFilename = DB_FILENAME) {
|
||||
const dir = path.join(baseDir, PLATFORM);
|
||||
fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
|
||||
this.dbPath = path.join(dir, dbFilename);
|
||||
|
||||
this.db = new DatabaseSync(this.dbPath);
|
||||
this.db.exec('PRAGMA journal_mode = WAL;');
|
||||
this.db.exec('PRAGMA synchronous = NORMAL;');
|
||||
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS notification_tasks (
|
||||
fingerprint TEXT PRIMARY KEY,
|
||||
user_id TEXT NOT NULL,
|
||||
nickname TEXT NOT NULL,
|
||||
avatar TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
time TEXT NOT NULL,
|
||||
feed_id TEXT NOT NULL,
|
||||
xsec_token TEXT NOT NULL,
|
||||
note_image TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
first_seen_at INTEGER NOT NULL,
|
||||
last_seen_at INTEGER NOT NULL,
|
||||
retry_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_attempt_at INTEGER,
|
||||
replied_at INTEGER,
|
||||
reply_content TEXT,
|
||||
error_message TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_notification_tasks_status_first_seen
|
||||
ON notification_tasks(status, first_seen_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_notification_tasks_user_content_status
|
||||
ON notification_tasks(user_id, content, status);
|
||||
`);
|
||||
|
||||
log.info({ dbPath: this.dbPath }, 'Notification state store initialized');
|
||||
}
|
||||
|
||||
buildFingerprint(notification: CommentNotification): string {
|
||||
const payload = [
|
||||
notification.feedId,
|
||||
notification.userId,
|
||||
notification.content.trim(),
|
||||
notification.time.trim(),
|
||||
notification.type.trim(),
|
||||
].join('|');
|
||||
|
||||
return crypto.createHash('sha256').update(payload).digest('hex');
|
||||
}
|
||||
|
||||
upsertNotifications(notifications: CommentNotification[]): NotificationUpsertResult {
|
||||
if (notifications.length === 0) {
|
||||
return { fetched: 0, inserted: 0, updated: 0 };
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
let inserted = 0;
|
||||
let updated = 0;
|
||||
|
||||
const selectStmt = this.db.prepare(
|
||||
'SELECT fingerprint FROM notification_tasks WHERE fingerprint = ?',
|
||||
);
|
||||
const insertStmt = this.db.prepare(`
|
||||
INSERT INTO notification_tasks (
|
||||
fingerprint, user_id, nickname, avatar, content, type, time,
|
||||
feed_id, xsec_token, note_image, status, first_seen_at, last_seen_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'new', ?, ?)
|
||||
`);
|
||||
const updateStmt = this.db.prepare(`
|
||||
UPDATE notification_tasks
|
||||
SET
|
||||
nickname = ?,
|
||||
avatar = ?,
|
||||
type = ?,
|
||||
time = ?,
|
||||
feed_id = ?,
|
||||
xsec_token = ?,
|
||||
note_image = ?,
|
||||
last_seen_at = ?
|
||||
WHERE fingerprint = ?
|
||||
`);
|
||||
|
||||
this.db.exec('BEGIN');
|
||||
try {
|
||||
for (const n of notifications) {
|
||||
const fp = this.buildFingerprint(n);
|
||||
const exists = selectStmt.get(fp) as { fingerprint: string } | undefined;
|
||||
|
||||
if (!exists) {
|
||||
insertStmt.run(
|
||||
fp,
|
||||
n.userId,
|
||||
n.nickname,
|
||||
n.avatar,
|
||||
n.content,
|
||||
n.type,
|
||||
n.time,
|
||||
n.feedId,
|
||||
n.xsecToken,
|
||||
n.noteImage,
|
||||
now,
|
||||
now,
|
||||
);
|
||||
inserted++;
|
||||
} else {
|
||||
updateStmt.run(
|
||||
n.nickname,
|
||||
n.avatar,
|
||||
n.type,
|
||||
n.time,
|
||||
n.feedId,
|
||||
n.xsecToken,
|
||||
n.noteImage,
|
||||
now,
|
||||
fp,
|
||||
);
|
||||
updated++;
|
||||
}
|
||||
}
|
||||
|
||||
this.db.exec('COMMIT');
|
||||
} catch (err) {
|
||||
this.db.exec('ROLLBACK');
|
||||
throw err;
|
||||
}
|
||||
|
||||
return {
|
||||
fetched: notifications.length,
|
||||
inserted,
|
||||
updated,
|
||||
};
|
||||
}
|
||||
|
||||
listByStatuses(
|
||||
statuses: NotificationTaskStatus[],
|
||||
maxCount: number,
|
||||
offset = 0,
|
||||
): NotificationTask[] {
|
||||
if (statuses.length === 0) return [];
|
||||
|
||||
const placeholders = statuses.map(() => '?').join(', ');
|
||||
const query = `
|
||||
SELECT
|
||||
fingerprint, user_id, nickname, avatar, content, type, time,
|
||||
feed_id, xsec_token, note_image, status, first_seen_at, last_seen_at,
|
||||
retry_count, last_attempt_at, replied_at, reply_content, error_message
|
||||
FROM notification_tasks
|
||||
WHERE status IN (${placeholders})
|
||||
ORDER BY first_seen_at ASC
|
||||
LIMIT ?
|
||||
OFFSET ?
|
||||
`;
|
||||
|
||||
const stmt = this.db.prepare(query);
|
||||
const rows = stmt.all(...statuses, maxCount, offset) as unknown as NotificationRow[];
|
||||
return rows.map((r) => this.rowToTask(r));
|
||||
}
|
||||
|
||||
listByStatusesKeyset(
|
||||
statuses: NotificationTaskStatus[],
|
||||
maxCount: number,
|
||||
cursor?: NotificationKeysetCursor,
|
||||
): { tasks: NotificationTask[]; hasMore: boolean; nextCursor?: NotificationKeysetCursor } {
|
||||
if (statuses.length === 0 || maxCount <= 0) {
|
||||
return { tasks: [], hasMore: false };
|
||||
}
|
||||
|
||||
const placeholders = statuses.map(() => '?').join(', ');
|
||||
const condition = cursor
|
||||
? `
|
||||
AND (
|
||||
first_seen_at > ?
|
||||
OR (first_seen_at = ? AND fingerprint > ?)
|
||||
)
|
||||
`
|
||||
: '';
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
fingerprint, user_id, nickname, avatar, content, type, time,
|
||||
feed_id, xsec_token, note_image, status, first_seen_at, last_seen_at,
|
||||
retry_count, last_attempt_at, replied_at, reply_content, error_message
|
||||
FROM notification_tasks
|
||||
WHERE status IN (${placeholders})
|
||||
${condition}
|
||||
ORDER BY first_seen_at ASC, fingerprint ASC
|
||||
LIMIT ?
|
||||
`;
|
||||
|
||||
const stmt = this.db.prepare(query);
|
||||
const limitWithSentinel = maxCount + 1;
|
||||
const rows = cursor
|
||||
? stmt.all(
|
||||
...statuses,
|
||||
cursor.firstSeenAt,
|
||||
cursor.firstSeenAt,
|
||||
cursor.fingerprint,
|
||||
limitWithSentinel,
|
||||
) as unknown as NotificationRow[]
|
||||
: stmt.all(...statuses, limitWithSentinel) as unknown as NotificationRow[];
|
||||
|
||||
const hasMore = rows.length > maxCount;
|
||||
const pageRows = hasMore ? rows.slice(0, maxCount) : rows;
|
||||
const tasks = pageRows.map((r) => this.rowToTask(r));
|
||||
|
||||
if (!hasMore || pageRows.length === 0) {
|
||||
return { tasks, hasMore };
|
||||
}
|
||||
|
||||
const last = pageRows[pageRows.length - 1]!;
|
||||
return {
|
||||
tasks,
|
||||
hasMore,
|
||||
nextCursor: {
|
||||
firstSeenAt: last.first_seen_at,
|
||||
fingerprint: last.fingerprint,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
countByStatuses(statuses: NotificationTaskStatus[]): number {
|
||||
if (statuses.length === 0) return 0;
|
||||
|
||||
const placeholders = statuses.map(() => '?').join(', ');
|
||||
const query = `
|
||||
SELECT COUNT(1) AS count
|
||||
FROM notification_tasks
|
||||
WHERE status IN (${placeholders})
|
||||
`;
|
||||
const stmt = this.db.prepare(query);
|
||||
const row = stmt.get(...statuses) as { count?: number } | undefined;
|
||||
return row?.count ?? 0;
|
||||
}
|
||||
|
||||
getByFingerprint(fingerprint: string): NotificationTask | null {
|
||||
const stmt = this.db.prepare(`
|
||||
SELECT
|
||||
fingerprint, user_id, nickname, avatar, content, type, time,
|
||||
feed_id, xsec_token, note_image, status, first_seen_at, last_seen_at,
|
||||
retry_count, last_attempt_at, replied_at, reply_content, error_message
|
||||
FROM notification_tasks
|
||||
WHERE fingerprint = ?
|
||||
LIMIT 1
|
||||
`);
|
||||
const row = stmt.get(fingerprint) as NotificationRow | undefined;
|
||||
return row ? this.rowToTask(row) : null;
|
||||
}
|
||||
|
||||
findOpenFingerprint(userId: string, content: string): string | null {
|
||||
const stmt = this.db.prepare(`
|
||||
SELECT fingerprint
|
||||
FROM notification_tasks
|
||||
WHERE user_id = ?
|
||||
AND content = ?
|
||||
AND status IN ('new', 'failed', 'pending')
|
||||
ORDER BY first_seen_at ASC
|
||||
LIMIT 1
|
||||
`);
|
||||
const row = stmt.get(userId, content) as { fingerprint: string } | undefined;
|
||||
return row?.fingerprint ?? null;
|
||||
}
|
||||
|
||||
markPending(fingerprint: string): void {
|
||||
const now = Date.now();
|
||||
const stmt = this.db.prepare(`
|
||||
UPDATE notification_tasks
|
||||
SET status = 'pending', last_attempt_at = ?, error_message = NULL
|
||||
WHERE fingerprint = ?
|
||||
`);
|
||||
stmt.run(now, fingerprint);
|
||||
}
|
||||
|
||||
markReplied(fingerprint: string, replyContent: string): void {
|
||||
const now = Date.now();
|
||||
const stmt = this.db.prepare(`
|
||||
UPDATE notification_tasks
|
||||
SET
|
||||
status = 'replied',
|
||||
replied_at = ?,
|
||||
last_attempt_at = ?,
|
||||
reply_content = ?,
|
||||
error_message = NULL
|
||||
WHERE fingerprint = ?
|
||||
`);
|
||||
stmt.run(now, now, replyContent, fingerprint);
|
||||
}
|
||||
|
||||
markFailed(fingerprint: string, errorMessage: string): void {
|
||||
const now = Date.now();
|
||||
const stmt = this.db.prepare(`
|
||||
UPDATE notification_tasks
|
||||
SET
|
||||
status = 'failed',
|
||||
retry_count = retry_count + 1,
|
||||
last_attempt_at = ?,
|
||||
error_message = ?
|
||||
WHERE fingerprint = ?
|
||||
`);
|
||||
stmt.run(now, errorMessage, fingerprint);
|
||||
}
|
||||
|
||||
markIgnored(fingerprint: string, reason?: string): void {
|
||||
const stmt = this.db.prepare(`
|
||||
UPDATE notification_tasks
|
||||
SET status = 'ignored', error_message = ?
|
||||
WHERE fingerprint = ?
|
||||
`);
|
||||
stmt.run(reason ?? 'Ignored by operator', fingerprint);
|
||||
}
|
||||
|
||||
setStatus(
|
||||
fingerprint: string,
|
||||
status: NotificationTaskStatus,
|
||||
note?: string,
|
||||
): void {
|
||||
const now = Date.now();
|
||||
const stmt = this.db.prepare(`
|
||||
UPDATE notification_tasks
|
||||
SET
|
||||
status = ?,
|
||||
last_attempt_at = CASE WHEN ? IN ('pending', 'failed', 'replied') THEN ? ELSE last_attempt_at END,
|
||||
replied_at = CASE WHEN ? = 'replied' THEN ? ELSE replied_at END,
|
||||
error_message = CASE WHEN ? = 'failed' THEN COALESCE(?, 'Marked as failed') WHEN ? = 'ignored' THEN COALESCE(?, 'Ignored by operator') ELSE error_message END,
|
||||
reply_content = CASE WHEN ? = 'replied' THEN COALESCE(?, reply_content) ELSE reply_content END
|
||||
WHERE fingerprint = ?
|
||||
`);
|
||||
stmt.run(
|
||||
status,
|
||||
status,
|
||||
now,
|
||||
status,
|
||||
now,
|
||||
status,
|
||||
note ?? null,
|
||||
status,
|
||||
note ?? null,
|
||||
status,
|
||||
note ?? null,
|
||||
fingerprint,
|
||||
);
|
||||
}
|
||||
|
||||
private rowToTask(row: NotificationRow): NotificationTask {
|
||||
return {
|
||||
fingerprint: row.fingerprint,
|
||||
notification: {
|
||||
userId: row.user_id,
|
||||
nickname: row.nickname,
|
||||
avatar: row.avatar,
|
||||
content: row.content,
|
||||
type: row.type,
|
||||
time: row.time,
|
||||
feedId: row.feed_id,
|
||||
xsecToken: row.xsec_token,
|
||||
noteImage: row.note_image,
|
||||
},
|
||||
status: row.status,
|
||||
firstSeenAt: new Date(row.first_seen_at).toISOString(),
|
||||
lastSeenAt: new Date(row.last_seen_at).toISOString(),
|
||||
retryCount: row.retry_count,
|
||||
...(row.last_attempt_at ? { lastAttemptAt: new Date(row.last_attempt_at).toISOString() } : {}),
|
||||
...(row.replied_at ? { repliedAt: new Date(row.replied_at).toISOString() } : {}),
|
||||
...(row.reply_content ? { replyContent: row.reply_content } : {}),
|
||||
...(row.error_message ? { errorMessage: row.error_message } : {}),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let storeSingleton: NotificationStateStore | null = null;
|
||||
|
||||
export function getNotificationStateStore(): NotificationStateStore {
|
||||
if (!storeSingleton) {
|
||||
storeSingleton = new NotificationStateStore();
|
||||
}
|
||||
return storeSingleton;
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { config } from '@social/core/config/index.js';
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { getCommentNotifications } from './notification.js';
|
||||
import { getNotificationStateStore, type NotificationUpsertResult } from './notification-state.js';
|
||||
|
||||
const PLATFORM = 'xiaohongshu';
|
||||
const log = logger.child({ module: 'xhs-notification-sync' });
|
||||
|
||||
export async function syncCommentNotifications(
|
||||
browser: BrowserManager,
|
||||
maxCount = config.notificationPollMaxCount,
|
||||
): Promise<NotificationUpsertResult> {
|
||||
const timeoutMs =
|
||||
config.operationTimeouts['feed_detail'] ??
|
||||
config.operationTimeouts['default'] ??
|
||||
60_000;
|
||||
|
||||
const notifications = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getCommentNotifications(page, maxCount),
|
||||
timeoutMs,
|
||||
);
|
||||
|
||||
const result = getNotificationStateStore().upsertNotifications(notifications);
|
||||
if (result.fetched > 0) {
|
||||
log.info(result, 'Notifications synced to state store');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export class XhsNotificationPoller {
|
||||
private timer: ReturnType<typeof setInterval> | null = null;
|
||||
private running = false;
|
||||
private started = false;
|
||||
|
||||
start(browser: BrowserManager): void {
|
||||
if (this.started) return;
|
||||
this.started = true;
|
||||
|
||||
if (!config.notificationPollEnabled) {
|
||||
log.info('Notification poller disabled by config');
|
||||
return;
|
||||
}
|
||||
|
||||
const tick = async (): Promise<void> => {
|
||||
if (this.running) return;
|
||||
this.running = true;
|
||||
|
||||
try {
|
||||
await syncCommentNotifications(browser, config.notificationPollMaxCount);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
// Login expiration and empty unread state are expected in daily usage.
|
||||
log.debug({ message }, 'Notification poll tick skipped');
|
||||
} finally {
|
||||
this.running = false;
|
||||
}
|
||||
};
|
||||
|
||||
void tick();
|
||||
this.timer = setInterval(() => {
|
||||
void tick();
|
||||
}, config.notificationPollIntervalMs);
|
||||
|
||||
if (this.timer && typeof this.timer === 'object' && 'unref' in this.timer) {
|
||||
this.timer.unref();
|
||||
}
|
||||
|
||||
log.info(
|
||||
{
|
||||
intervalMs: config.notificationPollIntervalMs,
|
||||
maxCount: config.notificationPollMaxCount,
|
||||
},
|
||||
'Notification poller started',
|
||||
);
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
if (this.timer) {
|
||||
clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
}
|
||||
this.started = false;
|
||||
this.running = false;
|
||||
log.info('Notification poller stopped');
|
||||
}
|
||||
}
|
||||
|
||||
export const xhsNotificationPoller = new XhsNotificationPoller();
|
||||
@@ -0,0 +1,220 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import type { CommentNotification } from './types.js';
|
||||
|
||||
const log = logger.child({ module: 'xhs-notification' });
|
||||
const sel = XHS_SELECTORS.notification;
|
||||
|
||||
const EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
const NOTIFICATION_URL = 'https://www.xiaohongshu.com/notification';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: extract userId from avatar href like /user/profile/xxx?xsecToken=yyy
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function parseUserHref(href: string): { userId: string; xsecToken: string } {
|
||||
const url = new URL(href, 'https://www.xiaohongshu.com');
|
||||
const parts = url.pathname.split('/');
|
||||
const userId = parts[parts.length - 1] || '';
|
||||
const xsecToken = url.searchParams.get('xsecToken') || '';
|
||||
return { userId, xsecToken };
|
||||
}
|
||||
|
||||
function parseFeedHref(href: string): { feedId: string; xsecToken: string } {
|
||||
const url = new URL(href, 'https://www.xiaohongshu.com');
|
||||
const parts = url.pathname.split('/');
|
||||
const feedId = parts[parts.length - 1] || '';
|
||||
const xsecToken = url.searchParams.get('xsecToken') || '';
|
||||
return { feedId, xsecToken };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getUnreadCount — read the badge number from the explore page bottom menu
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function getUnreadCount(page: Page): Promise<number> {
|
||||
const currentUrl = page.url();
|
||||
// Navigate to explore page if not already there
|
||||
if (!currentUrl.includes('/explore')) {
|
||||
await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1500);
|
||||
}
|
||||
|
||||
const badge = await page.$(sel.unreadBadge);
|
||||
if (!badge) {
|
||||
log.info('No unread badge found');
|
||||
return 0;
|
||||
}
|
||||
|
||||
const text = (await badge.textContent())?.trim() || '';
|
||||
const count = Number.parseInt(text, 10);
|
||||
if (Number.isNaN(count) || count <= 0) {
|
||||
log.info({ badgeText: text }, 'Unread badge text is not a positive number');
|
||||
return 0;
|
||||
}
|
||||
|
||||
log.info({ unreadCount: count }, 'Unread notification count');
|
||||
return count;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getCommentNotifications
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function getCommentNotifications(
|
||||
page: Page,
|
||||
maxCount = 20,
|
||||
): Promise<CommentNotification[]> {
|
||||
// 1. Check unread count on explore page first
|
||||
const unreadCount = await getUnreadCount(page);
|
||||
if (unreadCount === 0) {
|
||||
log.info('No unread notifications, returning empty');
|
||||
return [];
|
||||
}
|
||||
|
||||
// Use the smaller of unreadCount and maxCount
|
||||
const limit = Math.min(unreadCount, maxCount);
|
||||
log.info({ unreadCount, maxCount, limit }, 'Fetching comment notifications');
|
||||
|
||||
// 2. Navigate to notification page
|
||||
await page.goto(NOTIFICATION_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForSelector(sel.container, { timeout: 15_000 });
|
||||
// Small delay for DOM to settle
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
const containers = await page.$$(sel.container);
|
||||
log.info({ count: containers.length }, 'Found notification containers');
|
||||
|
||||
const results: CommentNotification[] = [];
|
||||
|
||||
for (const container of containers) {
|
||||
if (results.length >= limit) break;
|
||||
|
||||
// Check interaction hint — only include comment/@ notifications
|
||||
const hintEl = await container.$(sel.interactionHint);
|
||||
const hintText = hintEl ? (await hintEl.textContent())?.trim() || '' : '';
|
||||
if (!hintText.includes('评论') && !hintText.includes('@')) continue;
|
||||
|
||||
// Extract user info from avatar link
|
||||
const avatarLink = await container.$(sel.userAvatar);
|
||||
const avatarHref = avatarLink ? await avatarLink.getAttribute('href') : '';
|
||||
const avatarImg = avatarLink ? await avatarLink.$('img') : null;
|
||||
const avatarSrc = avatarImg ? (await avatarImg.getAttribute('src')) || '' : '';
|
||||
const { userId } = avatarHref ? parseUserHref(avatarHref) : { userId: '' };
|
||||
|
||||
// Nickname
|
||||
const nameEl = await container.$(sel.userName);
|
||||
const nickname = nameEl ? (await nameEl.textContent())?.trim() || '' : '';
|
||||
|
||||
// Comment content
|
||||
const contentEl = await container.$(sel.interactionContent);
|
||||
const content = contentEl ? (await contentEl.textContent())?.trim() || '' : '';
|
||||
|
||||
// Time
|
||||
const timeEl = await container.$(sel.interactionTime);
|
||||
const time = timeEl ? (await timeEl.textContent())?.trim() || '' : '';
|
||||
|
||||
// Feed info from thumbnail link
|
||||
const extraImg = await container.$(sel.extraImage);
|
||||
const noteImage = extraImg ? (await extraImg.getAttribute('src')) || '' : '';
|
||||
const extraHref = extraImg
|
||||
? await extraImg.evaluate((el) => {
|
||||
const anchor = el.closest('a');
|
||||
return anchor ? anchor.href : '';
|
||||
})
|
||||
: '';
|
||||
const { feedId, xsecToken } = extraHref
|
||||
? parseFeedHref(extraHref)
|
||||
: { feedId: '', xsecToken: '' };
|
||||
|
||||
results.push({
|
||||
userId,
|
||||
nickname,
|
||||
avatar: avatarSrc,
|
||||
content,
|
||||
type: hintText,
|
||||
time,
|
||||
feedId,
|
||||
xsecToken,
|
||||
noteImage,
|
||||
});
|
||||
}
|
||||
|
||||
log.info({ resultCount: results.length }, 'Comment notifications extracted');
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// replyNotification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function replyNotification(
|
||||
page: Page,
|
||||
userId: string,
|
||||
commentContent: string,
|
||||
replyContent: string,
|
||||
): Promise<{ success: boolean }> {
|
||||
log.info({ userId, commentContent: commentContent.slice(0, 30) }, 'Replying to notification');
|
||||
|
||||
await page.goto(NOTIFICATION_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForSelector(sel.container, { timeout: 15_000 });
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
const containers = await page.$$(sel.container);
|
||||
|
||||
let targetContainer: Awaited<ReturnType<Page['$']>> = null;
|
||||
|
||||
for (const container of containers) {
|
||||
// Match by userId
|
||||
const avatarLink = await container.$(sel.userAvatar);
|
||||
const avatarHref = avatarLink ? await avatarLink.getAttribute('href') : '';
|
||||
const { userId: uid } = avatarHref ? parseUserHref(avatarHref) : { userId: '' };
|
||||
if (uid !== userId) continue;
|
||||
|
||||
// Match by comment content
|
||||
const contentEl = await container.$(sel.interactionContent);
|
||||
const content = contentEl ? (await contentEl.textContent())?.trim() || '' : '';
|
||||
if (content !== commentContent) continue;
|
||||
|
||||
targetContainer = container;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!targetContainer) {
|
||||
log.warn({ userId, commentContent }, 'Target notification not found');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// Click the reply button
|
||||
const replyBtn = await targetContainer.$(sel.replyButton);
|
||||
if (!replyBtn) {
|
||||
log.warn('Reply button not found on target notification');
|
||||
return { success: false };
|
||||
}
|
||||
await replyBtn.click();
|
||||
|
||||
// Wait for the reply textarea to appear
|
||||
const textarea = await targetContainer.waitForSelector(sel.replyInput, { timeout: 5_000 });
|
||||
if (!textarea) {
|
||||
log.warn('Reply textarea did not appear');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// Type the reply
|
||||
await textarea.fill(replyContent);
|
||||
await page.waitForTimeout(300);
|
||||
|
||||
// Click submit
|
||||
const submitBtn = await targetContainer.$(sel.replySubmit);
|
||||
if (!submitBtn) {
|
||||
log.warn('Reply submit button not found');
|
||||
return { success: false };
|
||||
}
|
||||
await submitBtn.click();
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
log.info('Reply submitted successfully');
|
||||
return { success: true };
|
||||
}
|
||||
@@ -0,0 +1,314 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CREATOR_PUBLISH_URL = 'https://creator.xiaohongshu.com/publish/publish';
|
||||
|
||||
/**
|
||||
* Maximum time to wait for video upload to complete.
|
||||
* Videos can be up to 500 MB, so we allow up to 4 minutes for upload.
|
||||
*/
|
||||
const VIDEO_UPLOAD_TIMEOUT_MS = 240_000;
|
||||
|
||||
/** Wait after upload completes to let the UI settle. */
|
||||
const UPLOAD_SETTLE_MS = 2_000;
|
||||
|
||||
/** Wait after filling form fields. */
|
||||
const FIELD_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after clicking publish before checking result. */
|
||||
const PUBLISH_SETTLE_MS = 3_000;
|
||||
|
||||
const sel = XHS_SELECTORS.publish;
|
||||
|
||||
const log = logger.child({ module: 'xhs-publish-video' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// publishVideoNote
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PublishVideoOptions {
|
||||
tags?: string[];
|
||||
scheduleAt?: string;
|
||||
visibility?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Publish a video note on Xiaohongshu via the creator center UI.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param title - Note title (required, non-empty).
|
||||
* @param content - Note body / description text.
|
||||
* @param videoPath - Local path to the video file (already validated by caller).
|
||||
* @param options - Optional tags, schedule, and visibility.
|
||||
* @returns Object indicating success and an optional noteId if detectable.
|
||||
*/
|
||||
export async function publishVideoNote(
|
||||
page: Page,
|
||||
title: string,
|
||||
content: string,
|
||||
videoPath: string,
|
||||
options?: PublishVideoOptions,
|
||||
): Promise<{ success: boolean; noteId?: string; noteUrl?: string }> {
|
||||
log.info(
|
||||
{ hasOptions: !!options },
|
||||
'Starting video note publish',
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the creator publish page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
await page.goto(CREATOR_PUBLISH_URL, { waitUntil: 'domcontentloaded' });
|
||||
// Allow the SPA to hydrate.
|
||||
await page.waitForTimeout(2_000);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Upload the video via the file input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// The creator page uses the same file input for both images and video.
|
||||
// The platform detects the file type from the uploaded content.
|
||||
const fileInput = await page.waitForSelector(sel.imageFileInput, {
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
await fileInput.setInputFiles(videoPath);
|
||||
|
||||
log.debug('Video file set on input element, waiting for upload to complete');
|
||||
|
||||
// Wait for the video thumbnail / player to appear, indicating upload is done.
|
||||
// Video uploads take significantly longer than images.
|
||||
await page.waitForSelector(sel.uploadedVideoItem, {
|
||||
timeout: VIDEO_UPLOAD_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
// Give the UI time to settle after video processing.
|
||||
await page.waitForTimeout(UPLOAD_SETTLE_MS);
|
||||
|
||||
log.debug('Video uploaded successfully');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Fill in the title
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const titleInput = await page.waitForSelector(sel.titleInput, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await titleInput.click();
|
||||
await titleInput.fill('');
|
||||
await page.keyboard.type(title, { delay: 30 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Fill in the content / description
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const contentEditor = await page.waitForSelector(sel.contentEditor, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await contentEditor.click();
|
||||
await page.keyboard.type(content, { delay: 20 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Add tags (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.tags && options.tags.length > 0) {
|
||||
await addTags(page, options.tags);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 6. Set visibility (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.visibility && options.visibility !== 'public') {
|
||||
await setVisibility(page, options.visibility);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 7. Set schedule (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.scheduleAt) {
|
||||
await setSchedule(page, options.scheduleAt);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 8. Click the publish button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const publishBtn = await page.waitForSelector(sel.publishButton, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await publishBtn.click();
|
||||
|
||||
log.debug('Publish button clicked, waiting for success');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 9. Wait for success indicator
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const result = await waitForPublishResult(page);
|
||||
|
||||
log.info({ result }, 'Video note publish complete');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers (shared patterns with publish.ts — kept separate to
|
||||
// avoid circular imports and keep each module self-contained)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Add hashtag / topic tags by typing into the content editor.
|
||||
*/
|
||||
async function addTags(page: Page, tags: string[]): Promise<void> {
|
||||
for (const tag of tags) {
|
||||
const editor = await page.$(sel.contentEditor);
|
||||
if (editor) {
|
||||
await editor.click();
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
|
||||
await page.keyboard.type(`#${tag}`, { delay: 50 });
|
||||
await page.waitForTimeout(800);
|
||||
|
||||
const suggestion = await page
|
||||
.waitForSelector(sel.tagSuggestionItem, { timeout: 3_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (suggestion) {
|
||||
await suggestion.click();
|
||||
} else {
|
||||
await page.keyboard.press('Enter');
|
||||
}
|
||||
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the note visibility (private or friends-only).
|
||||
*/
|
||||
async function setVisibility(page: Page, visibility: string): Promise<void> {
|
||||
const visBtn = await page.$(sel.visibilityButton);
|
||||
if (!visBtn) {
|
||||
log.debug('Visibility button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await visBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
let optionSelector: string;
|
||||
switch (visibility) {
|
||||
case 'private':
|
||||
optionSelector = sel.visibilityPrivate;
|
||||
break;
|
||||
case 'friends':
|
||||
optionSelector = sel.visibilityFriends;
|
||||
break;
|
||||
default:
|
||||
optionSelector = sel.visibilityPublic;
|
||||
break;
|
||||
}
|
||||
|
||||
const option = await page.$(optionSelector);
|
||||
if (option) {
|
||||
await option.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug({ visibility }, 'Visibility option not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the schedule / timing picker and set the publish date.
|
||||
*/
|
||||
async function setSchedule(page: Page, scheduleAt: string): Promise<void> {
|
||||
const scheduleBtn = await page.$(sel.scheduleButton);
|
||||
if (!scheduleBtn) {
|
||||
log.debug('Schedule button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await scheduleBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
const scheduleInput = await page.$(sel.scheduleInput);
|
||||
if (scheduleInput) {
|
||||
await scheduleInput.click();
|
||||
await scheduleInput.fill('');
|
||||
await page.keyboard.type(scheduleAt, { delay: 30 });
|
||||
await page.keyboard.press('Enter');
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug('Schedule input not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the publish success indicator.
|
||||
*/
|
||||
async function waitForPublishResult(
|
||||
page: Page,
|
||||
): Promise<{ success: boolean; noteId?: string; noteUrl?: string }> {
|
||||
const urlChangePromise = page
|
||||
.waitForURL(sel.publishSuccessUrlPattern, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
const successElementPromise = page
|
||||
.waitForSelector(sel.publishSuccess, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
await page.waitForTimeout(PUBLISH_SETTLE_MS);
|
||||
|
||||
const [urlChanged, elementAppeared] = await Promise.all([
|
||||
urlChangePromise,
|
||||
successElementPromise,
|
||||
]);
|
||||
|
||||
if (!urlChanged && !elementAppeared) {
|
||||
const pageContent = await page.content();
|
||||
const hasSuccessText =
|
||||
pageContent.includes('发布成功') || pageContent.includes('已发布');
|
||||
|
||||
if (!hasSuccessText) {
|
||||
log.warn('No success indicator found after video publish');
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
const noteId = extractNoteIdFromUrl(page.url());
|
||||
const noteUrl = noteId ? `https://www.xiaohongshu.com/explore/${noteId}` : undefined;
|
||||
|
||||
return { success: true, noteId, noteUrl };
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to extract a note / post ID from the URL after successful publish.
|
||||
*/
|
||||
function extractNoteIdFromUrl(url: string): string | undefined {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const noteIdParam = parsed.searchParams.get('noteId');
|
||||
if (noteIdParam) return noteIdParam;
|
||||
|
||||
const pathMatch = parsed.pathname.match(/\/note\/([a-f0-9]+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
} catch {
|
||||
// URL parsing failed — noteId is optional.
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
@@ -0,0 +1,417 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const MAIN_EXPLORE_URL = 'https://www.xiaohongshu.com/explore';
|
||||
const CREATOR_PUBLISH_URL = 'https://creator.xiaohongshu.com/publish/publish?source=official';
|
||||
|
||||
/** Maximum time to wait for image uploads to finish (60 seconds). */
|
||||
const UPLOAD_WAIT_TIMEOUT_MS = 60_000;
|
||||
|
||||
/** Polling interval for checking upload completion. */
|
||||
const UPLOAD_POLL_INTERVAL_MS = 500;
|
||||
|
||||
/** Wait between image upload completions to let the UI settle. */
|
||||
const UPLOAD_SETTLE_MS = 1_500;
|
||||
|
||||
/** Wait after filling form fields to let debounce / auto-save settle. */
|
||||
const FIELD_SETTLE_MS = 500;
|
||||
|
||||
/** Wait after clicking publish before checking result. */
|
||||
const PUBLISH_SETTLE_MS = 3_000;
|
||||
|
||||
const sel = XHS_SELECTORS.publish;
|
||||
|
||||
const log = logger.child({ module: 'xhs-publish' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// publishImageNote
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PublishImageOptions {
|
||||
tags?: string[];
|
||||
scheduleAt?: string;
|
||||
isOriginal?: boolean;
|
||||
visibility?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Publish an image note on Xiaohongshu via the creator center UI.
|
||||
*
|
||||
* @param page - Playwright Page managed by BrowserManager.
|
||||
* @param title - Note title (required, non-empty).
|
||||
* @param content - Note body / description text.
|
||||
* @param imagePaths - Array of local file paths (already validated by caller).
|
||||
* @param options - Optional tags, schedule, original flag, and visibility.
|
||||
* @returns Object indicating success and an optional noteId if detectable.
|
||||
*/
|
||||
export async function publishImageNote(
|
||||
page: Page,
|
||||
title: string,
|
||||
content: string,
|
||||
imagePaths: string[],
|
||||
options?: PublishImageOptions,
|
||||
): Promise<{ success: boolean; noteId?: string; noteUrl?: string }> {
|
||||
log.info(
|
||||
{ imageCount: imagePaths.length, hasOptions: !!options },
|
||||
'Starting image note publish',
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 1. Navigate to the creator publish page
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// creator.xiaohongshu.com returns 401 when navigated to directly (headless).
|
||||
// Visiting the main site first establishes the session context that allows
|
||||
// the creator center to accept the shared .xiaohongshu.com cookies.
|
||||
await page.goto(MAIN_EXPLORE_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
await page.goto(CREATOR_PUBLISH_URL, { waitUntil: 'domcontentloaded' });
|
||||
// Allow the SPA to hydrate and tabs to render.
|
||||
await page.waitForTimeout(2_000);
|
||||
|
||||
// Verify we're not on the login page.
|
||||
if (page.url().includes('/login')) {
|
||||
throw new Error('Creator center redirected to login — cookies may be expired');
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 2. Switch to the image-note tab (page defaults to video upload)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// The publish page has tabs: 上传视频, 上传图文, 写长文.
|
||||
// One of the 上传图文 tabs is rendered off-screen (left:-9999px) as a
|
||||
// hidden duplicate — we must click only the visible (on-screen) one.
|
||||
const clicked = await page.$$eval('.creator-tab', (tabs) => {
|
||||
for (const tab of tabs) {
|
||||
if (!tab.textContent?.includes('图文')) continue;
|
||||
// Skip the off-screen duplicate (style="position:absolute;left:-9999px")
|
||||
const styleLeft = (tab as HTMLElement).style.left;
|
||||
if (styleLeft && parseInt(styleLeft, 10) < -100) continue;
|
||||
(tab as HTMLElement).click();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (clicked) {
|
||||
await page.waitForTimeout(1_000);
|
||||
log.debug('Switched to image note tab');
|
||||
} else {
|
||||
log.debug('Image tab not found — assuming page is already in image mode');
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 3. Upload images via the file input
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// The file input is hidden (display:none) by design; Playwright's
|
||||
// setInputFiles works on hidden inputs without needing visibility.
|
||||
const fileInput = await page.waitForSelector(sel.imageFileInput, {
|
||||
timeout: 10_000,
|
||||
state: 'attached',
|
||||
});
|
||||
|
||||
// Playwright's setInputFiles supports multiple files at once.
|
||||
await fileInput.setInputFiles(imagePaths);
|
||||
|
||||
log.debug({ count: imagePaths.length }, 'Files set on input element');
|
||||
|
||||
// Wait for all image thumbnails to appear (one per uploaded image).
|
||||
// Poll using page.$$ (Node-side API) to avoid needing browser-context
|
||||
// DOM types which are not available in our TypeScript lib config.
|
||||
await waitForUploadedImages(page, imagePaths.length);
|
||||
|
||||
// Give the UI a moment to settle after all uploads.
|
||||
await page.waitForTimeout(UPLOAD_SETTLE_MS);
|
||||
|
||||
log.debug('All images uploaded successfully');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 4. Fill in the title
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const titleInput = await page.waitForSelector(sel.titleInput, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await titleInput.click();
|
||||
await titleInput.fill('');
|
||||
await page.keyboard.type(title, { delay: 30 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 5. Fill in the content / description
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const contentEditor = await page.waitForSelector(sel.contentEditor, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await contentEditor.click();
|
||||
await page.keyboard.type(content, { delay: 20 });
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 6. Add tags (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.tags && options.tags.length > 0) {
|
||||
await addTags(page, options.tags);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 6. Set original flag (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.isOriginal) {
|
||||
await setOriginal(page);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 7. Set visibility (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.visibility && options.visibility !== 'public') {
|
||||
await setVisibility(page, options.visibility);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 8. Set schedule (optional)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
if (options?.scheduleAt) {
|
||||
await setSchedule(page, options.scheduleAt);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 9. Click the publish button
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const publishBtn = await page.waitForSelector(sel.publishButton, {
|
||||
timeout: 5_000,
|
||||
});
|
||||
await publishBtn.click();
|
||||
|
||||
log.debug('Publish button clicked, waiting for success');
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// 10. Wait for success indicator
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
const result = await waitForPublishResult(page);
|
||||
|
||||
log.info({ result }, 'Image note publish complete');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Poll until the expected number of uploaded image thumbnails appear on the
|
||||
* page. Uses `page.$$` (Node-side) so we don't need browser-context DOM
|
||||
* types in our TypeScript configuration.
|
||||
*/
|
||||
async function waitForUploadedImages(
|
||||
page: Page,
|
||||
expectedCount: number,
|
||||
): Promise<void> {
|
||||
const deadline = Date.now() + UPLOAD_WAIT_TIMEOUT_MS;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const items = await page.$$(sel.uploadedImageItem);
|
||||
if (items.length >= expectedCount) {
|
||||
return;
|
||||
}
|
||||
await page.waitForTimeout(UPLOAD_POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Timed out waiting for ${String(expectedCount)} uploaded images after ${String(UPLOAD_WAIT_TIMEOUT_MS)}ms`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add hashtag / topic tags by typing into the tag input.
|
||||
* For each tag, type the `#` prefix plus tag text, then select from
|
||||
* the dropdown suggestion or press Enter.
|
||||
*/
|
||||
async function addTags(page: Page, tags: string[]): Promise<void> {
|
||||
for (const tag of tags) {
|
||||
// Click the content editor to ensure we're in the right context,
|
||||
// then type `#` + tag text which triggers the topic selector.
|
||||
const editor = await page.$(sel.contentEditor);
|
||||
if (editor) {
|
||||
await editor.click();
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
|
||||
// Type the hashtag prefix which triggers the topic dropdown
|
||||
await page.keyboard.type(`#${tag}`, { delay: 50 });
|
||||
await page.waitForTimeout(800);
|
||||
|
||||
// Try to click the first suggestion item; if not available, press Enter.
|
||||
const suggestion = await page
|
||||
.waitForSelector(sel.tagSuggestionItem, { timeout: 3_000 })
|
||||
.catch(() => null);
|
||||
|
||||
if (suggestion) {
|
||||
await suggestion.click();
|
||||
} else {
|
||||
await page.keyboard.press('Enter');
|
||||
}
|
||||
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the "original content" checkbox if it exists and is not already checked.
|
||||
*/
|
||||
async function setOriginal(page: Page): Promise<void> {
|
||||
const checkbox = await page.$(sel.originalCheckbox);
|
||||
if (checkbox) {
|
||||
const isChecked = await checkbox.isChecked();
|
||||
if (!isChecked) {
|
||||
await checkbox.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
}
|
||||
} else {
|
||||
log.debug('Original checkbox not found, skipping');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the note visibility (private or friends-only).
|
||||
*/
|
||||
async function setVisibility(page: Page, visibility: string): Promise<void> {
|
||||
const visBtn = await page.$(sel.visibilityButton);
|
||||
if (!visBtn) {
|
||||
log.debug('Visibility button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await visBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
let optionSelector: string;
|
||||
switch (visibility) {
|
||||
case 'private':
|
||||
optionSelector = sel.visibilityPrivate;
|
||||
break;
|
||||
case 'friends':
|
||||
optionSelector = sel.visibilityFriends;
|
||||
break;
|
||||
default:
|
||||
optionSelector = sel.visibilityPublic;
|
||||
break;
|
||||
}
|
||||
|
||||
const option = await page.$(optionSelector);
|
||||
if (option) {
|
||||
await option.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug({ visibility }, 'Visibility option not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the schedule / timing picker and set the publish date.
|
||||
*/
|
||||
async function setSchedule(page: Page, scheduleAt: string): Promise<void> {
|
||||
const scheduleBtn = await page.$(sel.scheduleButton);
|
||||
if (!scheduleBtn) {
|
||||
log.debug('Schedule button not found, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await scheduleBtn.click();
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
|
||||
const scheduleInput = await page.$(sel.scheduleInput);
|
||||
if (scheduleInput) {
|
||||
await scheduleInput.click();
|
||||
await scheduleInput.fill('');
|
||||
await page.keyboard.type(scheduleAt, { delay: 30 });
|
||||
await page.keyboard.press('Enter');
|
||||
await page.waitForTimeout(FIELD_SETTLE_MS);
|
||||
} else {
|
||||
log.debug('Schedule input not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the publish success indicator (URL change or success element).
|
||||
* Returns the result with an optional noteId extracted from the URL.
|
||||
*/
|
||||
async function waitForPublishResult(
|
||||
page: Page,
|
||||
): Promise<{ success: boolean; noteId?: string; noteUrl?: string }> {
|
||||
// Strategy 1: Wait for the URL to change to a success page.
|
||||
// Strategy 2: Wait for a success element to appear.
|
||||
// Use Promise.all so both run concurrently.
|
||||
|
||||
const urlChangePromise = page
|
||||
.waitForURL(sel.publishSuccessUrlPattern, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
const successElementPromise = page
|
||||
.waitForSelector(sel.publishSuccess, { timeout: 30_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
|
||||
// Also wait a short baseline for the button click to process.
|
||||
await page.waitForTimeout(PUBLISH_SETTLE_MS);
|
||||
|
||||
const [urlChanged, elementAppeared] = await Promise.all([
|
||||
urlChangePromise,
|
||||
successElementPromise,
|
||||
]);
|
||||
|
||||
if (!urlChanged && !elementAppeared) {
|
||||
// Final fallback: check if the page content indicates success.
|
||||
const pageContent = await page.content();
|
||||
const hasSuccessText =
|
||||
pageContent.includes('发布成功') || pageContent.includes('已发布');
|
||||
|
||||
if (!hasSuccessText) {
|
||||
log.warn('No success indicator found after publish');
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract the note ID from the current URL if available.
|
||||
const noteId = extractNoteIdFromUrl(page.url());
|
||||
const noteUrl = noteId ? `https://www.xiaohongshu.com/explore/${noteId}` : undefined;
|
||||
|
||||
return { success: true, noteId, noteUrl };
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to extract a note / post ID from the URL after successful publish.
|
||||
*/
|
||||
function extractNoteIdFromUrl(url: string): string | undefined {
|
||||
// Pattern: /publish/success?noteId=xxx or /note/xxx
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const noteIdParam = parsed.searchParams.get('noteId');
|
||||
if (noteIdParam) return noteIdParam;
|
||||
|
||||
// Try path-based pattern: /note/<id>
|
||||
const pathMatch = parsed.pathname.match(/\/note\/([a-f0-9]+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
} catch {
|
||||
// URL parsing failed — not a problem, noteId is optional.
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,376 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MCP tool parameter schemas for Xiaohongshu tools.
|
||||
//
|
||||
// Phase 2 tools (login) have no parameters — their schemas are empty objects.
|
||||
// Phase 3/4 schemas are defined here so that the full tool surface is
|
||||
// established upfront and types can be inferred with z.infer<>.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// -- Phase 2: Login management (3 tools) -----------------------------------
|
||||
|
||||
/** xhs_check_login — no parameters. */
|
||||
export const CheckLoginSchema = {};
|
||||
|
||||
/** xhs_get_login_qrcode — no parameters. */
|
||||
export const GetLoginQRCodeSchema = {};
|
||||
|
||||
/** xhs_delete_cookies — no parameters. */
|
||||
export const DeleteCookiesSchema = {};
|
||||
|
||||
// -- Phase 3: Content browsing (4 tools) -----------------------------------
|
||||
|
||||
/** xhs_list_feeds — no parameters. */
|
||||
export const ListFeedsSchema = {
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of feeds to return per page (1–200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Keyset pagination cursor returned by previous call'),
|
||||
};
|
||||
|
||||
/** xhs_search */
|
||||
export const SearchSchema = {
|
||||
keyword: z.string().describe('Search keyword'),
|
||||
filters: z
|
||||
.object({
|
||||
sort: z
|
||||
.enum(['general', 'time_descending', 'popularity_descending'])
|
||||
.optional()
|
||||
.describe('Sort order'),
|
||||
type: z
|
||||
.enum(['all', 'note', 'video'])
|
||||
.optional()
|
||||
.describe('Content type filter'),
|
||||
time: z
|
||||
.enum(['all', 'day', 'week', 'half_year'])
|
||||
.optional()
|
||||
.describe('Time range filter'),
|
||||
})
|
||||
.optional()
|
||||
.describe('Optional search filters'),
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of search results to return per page (1–200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Pagination cursor returned by previous call'),
|
||||
};
|
||||
|
||||
/** xhs_get_feed_detail */
|
||||
export const GetFeedDetailSchema = {
|
||||
url: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Optional note URL (auto-parses feed_id and xsec_token)'),
|
||||
feed_id: z.string().optional().describe('Feed (note) ID (required when url not provided)'),
|
||||
xsec_token: z.string().optional().describe('Security token for the feed (required when url not provided)'),
|
||||
};
|
||||
|
||||
/** xhs_get_sub_comments */
|
||||
export const GetSubCommentsSchema = {
|
||||
feed_id: z.string().describe('Feed (note) ID'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
comment_id: z.string().describe('Parent comment ID whose sub-comments to load'),
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of sub-comments to return per page (1–200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Keyset pagination cursor returned by previous call'),
|
||||
};
|
||||
|
||||
/** xhs_get_user_profile */
|
||||
export const GetUserProfileSchema = {
|
||||
url: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Optional user profile URL (auto-parses user_id and xsec_token)'),
|
||||
user_id: z.string().optional().describe('User ID (required when url not provided)'),
|
||||
xsec_token: z.string().optional().describe('Security token for the user page (required when url not provided)'),
|
||||
};
|
||||
|
||||
// -- Phase 4: Content publishing (2 tools) ---------------------------------
|
||||
|
||||
/** xhs_publish_image */
|
||||
export const PublishImageSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key for publish request'),
|
||||
title: z.string().min(1).max(20, 'Title must be ≤ 20 characters').describe('Note title (max 20 chars)'),
|
||||
content: z.string().max(1000, 'Content must be ≤ 1000 characters').describe('Note body text (max 1000 chars)'),
|
||||
images: z
|
||||
.array(z.string())
|
||||
.min(1)
|
||||
.max(18, 'Maximum 18 images per note')
|
||||
.describe('Array of local file paths or HTTP/HTTPS URLs (1–18 images)'),
|
||||
tags: z.array(z.string()).optional().describe('Hashtags to attach'),
|
||||
schedule_at: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('ISO 8601 datetime for scheduled publishing'),
|
||||
is_original: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Mark as original content'),
|
||||
visibility: z
|
||||
.enum(['public', 'private', 'friends'])
|
||||
.optional()
|
||||
.default('public')
|
||||
.describe('Visibility setting'),
|
||||
};
|
||||
|
||||
/** xhs_publish_video */
|
||||
export const PublishVideoSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key for publish request'),
|
||||
title: z.string().min(1).max(20, 'Title must be ≤ 20 characters').describe('Note title (max 20 chars)'),
|
||||
content: z.string().max(1000, 'Content must be ≤ 1000 characters').describe('Note body text (max 1000 chars)'),
|
||||
video: z.string().describe('Local file path or HTTP/HTTPS URL for the video'),
|
||||
tags: z.array(z.string()).optional().describe('Hashtags to attach'),
|
||||
schedule_at: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('ISO 8601 datetime for scheduled publishing'),
|
||||
visibility: z
|
||||
.enum(['public', 'private', 'friends'])
|
||||
.optional()
|
||||
.default('public')
|
||||
.describe('Visibility setting'),
|
||||
};
|
||||
|
||||
// -- Phase 4: Interactions (4 tools) ---------------------------------------
|
||||
|
||||
/** xhs_post_comment */
|
||||
export const PostCommentSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key for comment request'),
|
||||
feed_id: z.string().describe('Feed ID to comment on'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
content: z.string().min(1).describe('Comment text'),
|
||||
};
|
||||
|
||||
/** xhs_reply_comment */
|
||||
export const ReplyCommentSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key for reply request'),
|
||||
feed_id: z.string().describe('Feed ID'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
comment_id: z.string().optional().describe('Comment ID to reply to'),
|
||||
user_id: z.string().optional().describe('User ID of the comment author'),
|
||||
content: z.string().min(1).describe('Reply text'),
|
||||
};
|
||||
|
||||
/** xhs_set_like_state */
|
||||
export const SetLikeStateSchema = {
|
||||
feed_id: z.string().describe('Feed ID to set like state'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
liked: z.boolean().describe('Target like state'),
|
||||
};
|
||||
|
||||
/** Legacy schema used by REST toggle endpoint. */
|
||||
export const LikeSchema = {
|
||||
feed_id: SetLikeStateSchema.feed_id,
|
||||
xsec_token: SetLikeStateSchema.xsec_token,
|
||||
};
|
||||
|
||||
/** xhs_list_my_notes */
|
||||
export const ListMyNotesSchema = {
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of notes to return per page (1–200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Keyset pagination cursor returned by previous call'),
|
||||
};
|
||||
|
||||
// -- Phase 5: Notifications & automation -----------------------------------
|
||||
|
||||
/** xhs_get_comment_notifications */
|
||||
export const GetCommentNotificationsSchema = {
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(50)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of notifications to return (1–50, default 20)'),
|
||||
};
|
||||
|
||||
/** xhs_reply_notification */
|
||||
export const ReplyNotificationSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key for notification reply'),
|
||||
fingerprint: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Optional notification fingerprint from xhs_get_unprocessed_notifications'),
|
||||
user_id: z.string().optional().describe('User ID of the comment author (fallback when fingerprint is absent)'),
|
||||
comment_content: z.string().optional().describe('Original comment content to match the notification (fallback when fingerprint is absent)'),
|
||||
reply_content: z.string().min(1).describe('Reply text to send'),
|
||||
};
|
||||
|
||||
/** xhs_get_unprocessed_notifications */
|
||||
export const GetUnprocessedNotificationsSchema = {
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of unprocessed notifications to return (1–200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Keyset pagination cursor returned by previous call'),
|
||||
statuses: z
|
||||
.array(z.enum(['new', 'pending', 'failed']))
|
||||
.optional()
|
||||
.describe('Statuses to include. Defaults to ["new", "failed"]'),
|
||||
sync: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(true)
|
||||
.describe('Whether to sync latest notifications from Xiaohongshu before querying local state'),
|
||||
};
|
||||
|
||||
/** xhs_mark_notification_task */
|
||||
export const MarkNotificationTaskSchema = {
|
||||
fingerprint: z.string().describe('Notification task fingerprint'),
|
||||
status: z
|
||||
.enum(['new', 'pending', 'ignored', 'replied', 'failed'])
|
||||
.describe('Target status for this notification task'),
|
||||
note: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Optional note/reason (used as reply_content for replied, or error_message for failed/ignored)'),
|
||||
};
|
||||
|
||||
/** xhs_list_failed_notification_tasks */
|
||||
export const ListFailedNotificationTasksSchema = {
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of failed tasks to return (1–200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Pagination cursor returned by previous call'),
|
||||
};
|
||||
|
||||
/** xhs_retry_notification_task */
|
||||
export const RetryNotificationTaskSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key for retry request'),
|
||||
fingerprint: z.string().describe('Notification task fingerprint to retry'),
|
||||
reply_content: z
|
||||
.string()
|
||||
.min(1)
|
||||
.optional()
|
||||
.describe('Optional override reply text. If omitted, uses stored reply_content from previous attempt.'),
|
||||
};
|
||||
|
||||
/** xhs_mark_notification_tasks */
|
||||
export const MarkNotificationTasksSchema = {
|
||||
tasks: z
|
||||
.array(z.object({
|
||||
fingerprint: z.string(),
|
||||
status: z.enum(['new', 'pending', 'ignored', 'replied', 'failed']),
|
||||
note: z.string().optional(),
|
||||
}))
|
||||
.min(1)
|
||||
.max(100)
|
||||
.describe('Batch of task status updates (1–100 items)'),
|
||||
};
|
||||
|
||||
/** xhs_retry_notification_tasks */
|
||||
export const RetryNotificationTasksSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key for batch retry request'),
|
||||
tasks: z
|
||||
.array(z.object({
|
||||
fingerprint: z.string(),
|
||||
reply_content: z.string().min(1).optional(),
|
||||
}))
|
||||
.min(1)
|
||||
.max(100)
|
||||
.describe('Batch of failed tasks to retry (1–100 items)'),
|
||||
continue_on_error: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(true)
|
||||
.describe('Continue processing remaining tasks after one task fails'),
|
||||
};
|
||||
|
||||
/** xhs_set_favorite_state */
|
||||
export const SetFavoriteStateSchema = {
|
||||
feed_id: z.string().describe('Feed ID to set favorite state'),
|
||||
xsec_token: z.string().describe('Security token for the feed'),
|
||||
favorited: z.boolean().describe('Target favorite state'),
|
||||
};
|
||||
|
||||
/** Legacy schema used by REST toggle endpoint. */
|
||||
export const FavoriteSchema = {
|
||||
feed_id: SetFavoriteStateSchema.feed_id,
|
||||
xsec_token: SetFavoriteStateSchema.xsec_token,
|
||||
};
|
||||
@@ -0,0 +1,387 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { Feed, SearchFilters } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const SEARCH_BASE_URL = 'https://www.xiaohongshu.com/search_result';
|
||||
|
||||
const log = logger.child({ module: 'xhs-search' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sort value mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Map our public sort enum values to the URL query parameter values. */
|
||||
const SORT_PARAM: Record<string, string> = {
|
||||
general: '0',
|
||||
time_descending: '1',
|
||||
popularity_descending: '2',
|
||||
};
|
||||
|
||||
/** Map our note type filter values to the URL query parameter values. */
|
||||
const TYPE_PARAM: Record<string, string> = {
|
||||
all: '0',
|
||||
note: '1',
|
||||
video: '2',
|
||||
};
|
||||
|
||||
/** Map time range filter values to URL query parameter values. */
|
||||
const TIME_PARAM: Record<string, string> = {
|
||||
all: '0',
|
||||
day: '1',
|
||||
week: '2',
|
||||
half_year: '3',
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for search results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawSearchFeedItem {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
name?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
noteCard?: RawSearchNoteCard;
|
||||
type?: string;
|
||||
model_type?: string;
|
||||
cover?: RawSearchImage;
|
||||
user?: RawSearchUser;
|
||||
interactInfo?: RawSearchInteractInfo;
|
||||
interact_info?: RawSearchInteractInfo;
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
interface RawSearchNoteCard {
|
||||
noteId?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: RawSearchImage;
|
||||
user?: RawSearchUser;
|
||||
interactInfo?: RawSearchInteractInfo;
|
||||
interact_info?: RawSearchInteractInfo;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
}
|
||||
|
||||
interface RawSearchImage {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
urlDefault?: string;
|
||||
url_pre?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
}
|
||||
|
||||
interface RawSearchUser {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
}
|
||||
|
||||
interface RawSearchInteractInfo {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
}
|
||||
|
||||
interface SearchInitialState {
|
||||
searchNotes?: {
|
||||
feeds?: RawSearchFeedItem[];
|
||||
};
|
||||
searchResult?: {
|
||||
notes?: RawSearchFeedItem[];
|
||||
feeds?: RawSearchFeedItem[];
|
||||
};
|
||||
search?: {
|
||||
feeds?: RawSearchFeedItem[];
|
||||
notes?: RawSearchFeedItem[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// searchFeeds
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Search Xiaohongshu for notes matching a keyword, with optional filters.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param keyword - The search term.
|
||||
* @param filters - Optional sorting, type, and time range filters.
|
||||
* @returns An array of Feed objects matching the search.
|
||||
*/
|
||||
export async function searchFeeds(
|
||||
page: Page,
|
||||
keyword: string,
|
||||
filters?: SearchFilters,
|
||||
): Promise<Feed[]> {
|
||||
const url = buildSearchUrl(keyword, filters);
|
||||
log.debug({ keyword, filters, url }, 'Navigating to search page');
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the page to render search results.
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as SearchInitialState | null;
|
||||
|
||||
if (initialState) {
|
||||
const feeds = parseSearchFeedsFromState(initialState);
|
||||
if (feeds.length > 0) {
|
||||
log.info({ keyword, count: feeds.length }, 'Extracted search results from __INITIAL_STATE__');
|
||||
return feeds;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no search feeds extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping using Playwright Node-side API
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug('Falling back to DOM scraping for search results');
|
||||
const feeds = await scrapeSearchResultsFromDom(page);
|
||||
log.info({ keyword, count: feeds.length }, 'Extracted search results from DOM');
|
||||
return feeds;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// URL construction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the full search URL with query parameters for keyword and filters.
|
||||
*/
|
||||
function buildSearchUrl(keyword: string, filters?: SearchFilters): string {
|
||||
const params = new URLSearchParams();
|
||||
params.set('keyword', keyword);
|
||||
|
||||
if (filters?.sort && SORT_PARAM[filters.sort]) {
|
||||
params.set('sort', SORT_PARAM[filters.sort]!);
|
||||
}
|
||||
|
||||
if (filters?.type && TYPE_PARAM[filters.type]) {
|
||||
params.set('type', TYPE_PARAM[filters.type]!);
|
||||
}
|
||||
|
||||
if (filters?.time && TIME_PARAM[filters.time]) {
|
||||
params.set('time', TIME_PARAM[filters.time]!);
|
||||
}
|
||||
|
||||
return `${SEARCH_BASE_URL}?${params.toString()}`;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing for search results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse search results from the __INITIAL_STATE__ data.
|
||||
*/
|
||||
function parseSearchFeedsFromState(state: SearchInitialState): Feed[] {
|
||||
// Try multiple known locations where search data may live.
|
||||
const rawFeeds: RawSearchFeedItem[] =
|
||||
state.searchNotes?.feeds ??
|
||||
state.searchResult?.notes ??
|
||||
state.searchResult?.feeds ??
|
||||
state.search?.feeds ??
|
||||
state.search?.notes ??
|
||||
[];
|
||||
|
||||
if (rawFeeds.length === 0) {
|
||||
// Walk top-level keys looking for an array that resembles feeds.
|
||||
for (const key of Object.keys(state)) {
|
||||
const value = state[key];
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const obj = value as Record<string, unknown>;
|
||||
const candidates = ['feeds', 'notes', 'items'];
|
||||
for (const candidate of candidates) {
|
||||
if (Array.isArray(obj[candidate])) {
|
||||
const parsed = (obj[candidate] as RawSearchFeedItem[])
|
||||
.map(parseRawSearchItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
if (parsed.length > 0) return parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawFeeds
|
||||
.map(parseRawSearchItem)
|
||||
.filter((f): f is Feed => f !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a single raw search result item into a structured Feed.
|
||||
*/
|
||||
function parseRawSearchItem(raw: RawSearchFeedItem): Feed | null {
|
||||
const card = raw.noteCard;
|
||||
|
||||
const id =
|
||||
raw.id ?? raw.noteId ?? raw.note_id ?? card?.noteId ?? '';
|
||||
const xsecToken =
|
||||
raw.xsecToken ?? raw.xsec_token ?? card?.xsecToken ?? card?.xsec_token ?? '';
|
||||
const title =
|
||||
raw.displayTitle ?? raw.display_title ?? raw.title ?? raw.name ??
|
||||
card?.displayTitle ?? card?.display_title ?? card?.title ?? '';
|
||||
const description =
|
||||
raw.desc ?? raw.description ?? card?.desc ?? '';
|
||||
|
||||
const rawType = raw.type ?? raw.model_type ?? card?.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
const rawCover = raw.cover ?? card?.cover;
|
||||
const coverUrl = extractSearchImageUrl(rawCover);
|
||||
|
||||
const rawUser = raw.user ?? card?.user;
|
||||
const userId = rawUser?.userId ?? rawUser?.user_id ?? '';
|
||||
const nickname =
|
||||
rawUser?.nickname ?? rawUser?.nick_name ?? rawUser?.nickName ?? '';
|
||||
const avatar =
|
||||
rawUser?.avatar ?? rawUser?.avatarUrl ?? rawUser?.avatar_url ?? '';
|
||||
|
||||
const interactInfo = raw.interactInfo ?? raw.interact_info ?? card?.interactInfo ?? card?.interact_info;
|
||||
const likeCountStr =
|
||||
interactInfo?.likedCount ?? interactInfo?.liked_count ??
|
||||
interactInfo?.likeCount ?? interactInfo?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
if (!id) return null;
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract image URL from a raw search cover object.
|
||||
*/
|
||||
function extractSearchImageUrl(raw: RawSearchImage | undefined): string {
|
||||
if (!raw) return '';
|
||||
if (raw.url) return ensureHttps(raw.url);
|
||||
if (raw.urlPre) return ensureHttps(raw.urlPre);
|
||||
if (raw.urlDefault) return ensureHttps(raw.urlDefault);
|
||||
if (raw.url_pre) return ensureHttps(raw.url_pre);
|
||||
if (raw.url_default) return ensureHttps(raw.url_default);
|
||||
|
||||
const infoList = raw.infoList ?? raw.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
return ensureHttps(infoList[0].url);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape search results using Playwright's Node-side API to avoid
|
||||
* needing DOM lib types.
|
||||
*/
|
||||
async function scrapeSearchResultsFromDom(page: Page): Promise<Feed[]> {
|
||||
// Wait for the search result note items to appear.
|
||||
await page
|
||||
.waitForSelector('.feeds-container .note-item', { timeout: 10_000 })
|
||||
.catch(() => null);
|
||||
|
||||
const cardElements = await page.$$('.feeds-container .note-item');
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of cardElements) {
|
||||
try {
|
||||
const href = await card
|
||||
.$eval('a.cover', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/(?:explore|search_result)\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? '';
|
||||
const xsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
const coverUrl = await card
|
||||
.$eval('a.cover img', (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const title = await card
|
||||
.$eval('.footer .title', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const nickname = await card
|
||||
.$eval('.footer .author-wrapper .name', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const avatar = await card
|
||||
.$eval('.footer .author-wrapper .author-head img', (el) => el.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const authorHref = await card
|
||||
.$eval('.footer .author-wrapper a', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
const authorIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
||||
const userId = authorIdMatch?.[1] ?? '';
|
||||
|
||||
const likeText = await card
|
||||
.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
const likeCount = parseCountString(likeText);
|
||||
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken,
|
||||
title,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user: { id: userId, nickname, avatar },
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// CSS Selectors — centralised so that UI changes only require edits here.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const XHS_SELECTORS = {
|
||||
login: {
|
||||
/** QR code image on the login modal (auto-appears after a few seconds). */
|
||||
qrCodeImage: 'img.qrcode-img',
|
||||
/** Element present only when the user is logged in (sidebar channel link). */
|
||||
loggedInIndicator: '.user .link-wrapper .channel',
|
||||
/** The "login" button that opens the QR code modal (if not already shown). */
|
||||
loginButton: '.login-btn',
|
||||
/** Logged-in user's avatar image in the sidebar. */
|
||||
userAvatar: '.user .avatar img',
|
||||
/** Logged-in user's profile link in the sidebar (href contains userId). */
|
||||
userLink: '.user .link-wrapper a',
|
||||
},
|
||||
|
||||
feed: {
|
||||
/** Container for each feed card on the explore page. */
|
||||
feedCard: '.note-item',
|
||||
/** The cover image within a feed card. */
|
||||
coverImage: '.note-item a.cover img',
|
||||
/** The title/footer within a feed card. */
|
||||
footerTitle: '.note-item .footer .title',
|
||||
/** Author name within a feed card. */
|
||||
authorName: '.note-item .footer .author-wrapper .name',
|
||||
/** Author avatar within a feed card. */
|
||||
authorAvatar: '.note-item .footer .author-wrapper .author-head img',
|
||||
/** Like count within a feed card. */
|
||||
likeCount: '.note-item .footer .like-wrapper .count',
|
||||
},
|
||||
|
||||
search: {
|
||||
/** Search result container. */
|
||||
resultContainer: '#global-search-result-container',
|
||||
/** Individual search result note items. */
|
||||
noteItem: '.feeds-container .note-item',
|
||||
/** Search result cover image. */
|
||||
coverImage: '.feeds-container .note-item a.cover img',
|
||||
/** Search result title. */
|
||||
title: '.feeds-container .note-item .footer .title',
|
||||
/** Search result author name. */
|
||||
authorName: '.feeds-container .note-item .footer .author-wrapper .name',
|
||||
/** Search result author avatar. */
|
||||
authorAvatar: '.feeds-container .note-item .footer .author-wrapper .author-head img',
|
||||
/** Search result like count. */
|
||||
likeCount: '.feeds-container .note-item .footer .like-wrapper .count',
|
||||
},
|
||||
|
||||
feedDetail: {
|
||||
/** The main content container for a note detail page. */
|
||||
noteContainer: '#noteContainer',
|
||||
/** The title of the note. */
|
||||
title: '#detail-title',
|
||||
/** The description / body content of the note. */
|
||||
description: '#detail-desc',
|
||||
/** Individual images in an image note. */
|
||||
images: '.note-image-list .note-image img',
|
||||
/** The single hero image (some notes use this instead of a list). */
|
||||
heroImage: '.note-hero img',
|
||||
/** Video player element. */
|
||||
video: '#videoplayer video',
|
||||
/** Video player source. */
|
||||
videoSource: '#videoplayer video source',
|
||||
/** Tag links within the note body. */
|
||||
tags: '#detail-desc a.tag',
|
||||
/** Like count. */
|
||||
likeCount: '.engage-bar .like-wrapper .count',
|
||||
/** Collect (favorite) count. */
|
||||
collectCount: '.engage-bar .collect-wrapper .count',
|
||||
/** Comment count. */
|
||||
commentCount: '.engage-bar .chat-wrapper .count',
|
||||
/** Share count. */
|
||||
/** Publish / create time text. */
|
||||
createTime: '.note-scroller .bottom-container .date',
|
||||
/** IP location. */
|
||||
ipLocation: '.note-scroller .bottom-container .ip-location',
|
||||
/** Author nickname on the detail page. */
|
||||
authorName: '.author-container .info .name',
|
||||
/** Author avatar on the detail page. */
|
||||
authorAvatar: '.author-container .info .avatar img',
|
||||
/** Author user ID link. */
|
||||
authorLink: '.author-container .info a',
|
||||
/** Comment list container. */
|
||||
commentListContainer: '.comments-container .list-container',
|
||||
/** Individual top-level comment items. */
|
||||
commentItem: '.comments-container .list-container > .parent-comment > .comment-item',
|
||||
/** Parent comment content text. */
|
||||
commentContent: '.content',
|
||||
/** Comment author name. */
|
||||
commentAuthor: '.author .name',
|
||||
/** Comment author avatar. */
|
||||
commentAvatar: '.avatar img.avatar-item',
|
||||
/** Comment like count. */
|
||||
commentLikeCount: '.like .count',
|
||||
/** Comment publish time. */
|
||||
commentTime: '.date',
|
||||
/** Comment IP location. */
|
||||
commentIpLocation: '.ip-location',
|
||||
/** Sub-comment (reply) items. */
|
||||
subCommentItem: '.sub-comment-list .sub-comment-item',
|
||||
/** "Show more comments" button. */
|
||||
showMoreComments: '.comments-container .show-more',
|
||||
/** "Load more replies" button within a comment thread. */
|
||||
loadMoreReplies: '.sub-comment-list .show-more',
|
||||
/** Sub-comment count text element (e.g. "展开 X 条回复"). */
|
||||
subCommentCountText: '.sub-comment-list .show-more, .reply-container .show-more',
|
||||
},
|
||||
|
||||
userProfile: {
|
||||
/** Profile header container. */
|
||||
headerContainer: '.user-info',
|
||||
/** User nickname. */
|
||||
nickname: '.user-info .user-name',
|
||||
/** User avatar image (the img itself carries class user-image). */
|
||||
avatar: '.user-info img.user-image',
|
||||
/** User bio / description text. */
|
||||
description: '.user-info .user-desc',
|
||||
/** User gender icon or text. */
|
||||
gender: '.user-info .gender-icon',
|
||||
/** IP location. */
|
||||
ipLocation: '.user-info .user-ip',
|
||||
/** Follower / following / interaction count elements. */
|
||||
followCount: '.user-info .user-interactions > div',
|
||||
/** Individual feed items on the user profile. */
|
||||
feedItem: '.feeds-container .note-item',
|
||||
},
|
||||
|
||||
// -- Phase 4: Publish -----------------------------------------------------
|
||||
|
||||
publish: {
|
||||
/** The file input element for uploading images on the creator publish page. */
|
||||
imageFileInput: 'input[type="file"]',
|
||||
/** Title input field on the publish form. */
|
||||
titleInput: 'input.d-text[placeholder*="标题"]',
|
||||
/** Content / body editor area on the publish form (contenteditable ProseMirror). */
|
||||
contentEditor: '.tiptap.ProseMirror',
|
||||
/** The tag / topic button that opens the topic input. */
|
||||
tagButton: 'button.contentBtn.topic-btn',
|
||||
/** Tag / topic input field for typing hashtags. */
|
||||
tagInput: 'button.contentBtn.topic-btn input',
|
||||
/** Topic / hashtag suggestion dropdown item. */
|
||||
tagSuggestionItem: '.publish-topic-item, .topic-item',
|
||||
/** "Publish" / submit button. */
|
||||
publishButton: 'button.d-button:has-text("发布")',
|
||||
/** Schedule / timing selector button. */
|
||||
scheduleButton: '.timing-btn, button:has-text("定时")',
|
||||
/** Schedule date/time input field. */
|
||||
scheduleInput: '.timing-input input, .schedule-input input',
|
||||
/** Original content declaration checkbox. */
|
||||
originalCheckbox: '.original-checkbox input, input[type="checkbox"][name="original"]',
|
||||
/** Visibility / permission setting button. */
|
||||
visibilityButton: '.permission-btn, button:has-text("可见")',
|
||||
/** Visibility option for public. */
|
||||
visibilityPublic: '.permission-option:has-text("公开"), .visibility-option:has-text("公开")',
|
||||
/** Visibility option for private. */
|
||||
visibilityPrivate: '.permission-option:has-text("私密"), .visibility-option:has-text("私密")',
|
||||
/** Visibility option for friends only. */
|
||||
visibilityFriends: '.permission-option:has-text("好友"), .visibility-option:has-text("好友")',
|
||||
/** Upload complete indicator (images uploaded and thumbnails visible). */
|
||||
uploadedImageItem: '.img-upload-area .img-container',
|
||||
/** Video upload complete indicator (video thumbnail visible). */
|
||||
uploadedVideoItem: '.upload-video video, .video-item video, .video-container video',
|
||||
/** Success indicator shown after publish completes. */
|
||||
publishSuccess: '.success-panel, .publish-success, .note-success',
|
||||
/** URL in the address bar after successful publish (used as a fallback check). */
|
||||
publishSuccessUrlPattern: /\/publish\/success/,
|
||||
},
|
||||
|
||||
// -- Phase 4: Comment / Reply ---------------------------------------------
|
||||
|
||||
comment: {
|
||||
/** The comment input field / textarea on the feed detail page. */
|
||||
commentInput: '#content-textarea',
|
||||
/** Alternative comment input (contenteditable div). */
|
||||
commentInputAlt: '[contenteditable][data-placeholder]',
|
||||
/** Comment submit / send button. */
|
||||
commentSubmitButton: '.comment-submit, button.submit, .btn-send',
|
||||
/** Parent comment element (used to find specific comment by ID). */
|
||||
commentItem: '.comment-item, .note-comment-item, [id^="comment-"]',
|
||||
/** Reply button on an individual comment. */
|
||||
commentReplyButton: '.reply-btn, .comment-reply',
|
||||
/** Reply input that appears after clicking reply. */
|
||||
replyInput: '.reply-input textarea, .reply-content [contenteditable], .reply-area textarea',
|
||||
},
|
||||
|
||||
// -- Phase 4: Interaction (Like / Favorite) --------------------------------
|
||||
|
||||
// -- Phase 5: Notification ------------------------------------------------
|
||||
|
||||
notification: {
|
||||
/** Each notification item container. */
|
||||
container: '.container',
|
||||
/** User avatar link (href contains userId + xsecToken). */
|
||||
userAvatar: 'a.user-avatar',
|
||||
/** User name link. */
|
||||
userName: '.user-info a',
|
||||
/** Interaction type hint (e.g. "评论了你的笔记"). */
|
||||
interactionHint: '.interaction-hint span:first-child',
|
||||
/** Notification time. */
|
||||
interactionTime: '.interaction-time',
|
||||
/** Comment content text. */
|
||||
interactionContent: '.interaction-content',
|
||||
/** Note thumbnail image (parent link href contains feedId + xsecToken). */
|
||||
extraImage: '.extra img',
|
||||
/** Reply button to expand inline reply. */
|
||||
replyButton: '.action-reply',
|
||||
/** Reply textarea that appears after clicking reply. */
|
||||
replyInput: 'textarea.comment-input',
|
||||
/** Reply submit button. */
|
||||
replySubmit: 'button.submit',
|
||||
/** Unread badge on the explore page bottom menu. */
|
||||
unreadBadge: '#global > div.main-container > div.bottom-menu > div > li.link-wrapper.bottom-channel > a > div > div',
|
||||
},
|
||||
|
||||
interaction: {
|
||||
/** Like button on the feed detail page. */
|
||||
likeButton: '.engage-bar-style .like-wrapper',
|
||||
/** Like button in active/liked state. */
|
||||
likeButtonActive: '.engage-bar-style .like-wrapper.like-active',
|
||||
/** Like count element next to the like button. */
|
||||
likeCount: '.engage-bar .like-wrapper .count',
|
||||
/** Favorite / collect button on the feed detail page. */
|
||||
favoriteButton: '.engage-bar-style .collect-wrapper',
|
||||
/** Favorite button in active/favorited state. */
|
||||
favoriteButtonActive: '.engage-bar-style .collect-wrapper.collect-active',
|
||||
/** Favorite count element next to the favorite button. */
|
||||
favoriteCount: '.engage-bar .collect-wrapper .count',
|
||||
/** Container for the interaction bar at the bottom of a feed detail. */
|
||||
interactionBar: '.interact-container, .engage-bar',
|
||||
},
|
||||
} as const;
|
||||
@@ -0,0 +1,95 @@
|
||||
interface FeedTargetInput {
|
||||
feed_id?: string;
|
||||
xsec_token?: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
interface UserTargetInput {
|
||||
user_id?: string;
|
||||
xsec_token?: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
interface FeedTargetResolved {
|
||||
feedId: string;
|
||||
xsecToken: string;
|
||||
}
|
||||
|
||||
interface UserTargetResolved {
|
||||
userId: string;
|
||||
xsecToken: string;
|
||||
}
|
||||
|
||||
function parseXhsUrl(rawUrl: string): URL {
|
||||
const trimmed = rawUrl.trim();
|
||||
if (!trimmed) {
|
||||
throw new Error('url cannot be empty');
|
||||
}
|
||||
|
||||
if (/^https?:\/\//i.test(trimmed)) {
|
||||
return new URL(trimmed);
|
||||
}
|
||||
|
||||
if (trimmed.startsWith('/')) {
|
||||
return new URL(`https://www.xiaohongshu.com${trimmed}`);
|
||||
}
|
||||
|
||||
return new URL(`https://${trimmed}`);
|
||||
}
|
||||
|
||||
function extractFeedIdFromPath(pathname: string): string | undefined {
|
||||
const patterns = [
|
||||
/\/explore\/([a-zA-Z0-9_-]+)/,
|
||||
/\/discovery\/item\/([a-zA-Z0-9_-]+)/,
|
||||
/\/note\/([a-zA-Z0-9_-]+)/,
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const match = pathname.match(pattern);
|
||||
if (match?.[1]) return match[1];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function extractUserIdFromPath(pathname: string): string | undefined {
|
||||
const match = pathname.match(/\/user\/profile\/([a-zA-Z0-9_-]+)/);
|
||||
return match?.[1];
|
||||
}
|
||||
|
||||
function extractXsecToken(url: URL): string | undefined {
|
||||
return url.searchParams.get('xsec_token') ?? url.searchParams.get('xsecToken') ?? undefined;
|
||||
}
|
||||
|
||||
export function resolveFeedTarget(input: FeedTargetInput): FeedTargetResolved {
|
||||
let feedId = input.feed_id?.trim();
|
||||
let xsecToken = input.xsec_token?.trim();
|
||||
|
||||
if (input.url) {
|
||||
const parsed = parseXhsUrl(input.url);
|
||||
feedId = feedId || extractFeedIdFromPath(parsed.pathname);
|
||||
xsecToken = xsecToken || extractXsecToken(parsed);
|
||||
}
|
||||
|
||||
if (!feedId || !xsecToken) {
|
||||
throw new Error('xhs_get_feed_detail requires either url with feed_id/xsec_token, or both feed_id and xsec_token');
|
||||
}
|
||||
|
||||
return { feedId, xsecToken };
|
||||
}
|
||||
|
||||
export function resolveUserTarget(input: UserTargetInput): UserTargetResolved {
|
||||
let userId = input.user_id?.trim();
|
||||
let xsecToken = input.xsec_token?.trim();
|
||||
|
||||
if (input.url) {
|
||||
const parsed = parseXhsUrl(input.url);
|
||||
userId = userId || extractUserIdFromPath(parsed.pathname);
|
||||
xsecToken = xsecToken || extractXsecToken(parsed);
|
||||
}
|
||||
|
||||
if (!userId || !xsecToken) {
|
||||
throw new Error('xhs_get_user_profile requires either url with user_id/xsec_token, or both user_id and xsec_token');
|
||||
}
|
||||
|
||||
return { userId, xsecToken };
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// Xiaohongshu domain types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// -- Login -----------------------------------------------------------------
|
||||
|
||||
export interface LoginStatus {
|
||||
loggedIn: boolean;
|
||||
username?: string;
|
||||
avatar?: string;
|
||||
userId?: string;
|
||||
}
|
||||
|
||||
export interface QRCodeResult {
|
||||
/** Base64 data URI of the QR code image. */
|
||||
qrcodeData: string;
|
||||
/** Whether the user was already logged in (no QR code needed). */
|
||||
alreadyLoggedIn: boolean;
|
||||
/** Human-readable timeout hint (e.g. "4m"). */
|
||||
timeout: string;
|
||||
}
|
||||
|
||||
// -- Feed -----------------------------------------------------------------
|
||||
|
||||
export interface FeedUser {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
}
|
||||
|
||||
export interface Feed {
|
||||
id: string;
|
||||
xsecToken: string;
|
||||
title: string;
|
||||
description: string;
|
||||
type: 'normal' | 'video';
|
||||
coverUrl: string;
|
||||
likeCount: number;
|
||||
user: FeedUser;
|
||||
}
|
||||
|
||||
// -- Feed Detail ----------------------------------------------------------
|
||||
|
||||
export interface FeedDetail {
|
||||
id: string;
|
||||
xsecToken: string;
|
||||
title: string;
|
||||
description: string;
|
||||
type: 'normal' | 'video';
|
||||
images: string[];
|
||||
videoUrl?: string;
|
||||
tags: string[];
|
||||
likeCount: number;
|
||||
collectCount: number;
|
||||
commentCount: number;
|
||||
isLiked: boolean;
|
||||
isFavorited: boolean;
|
||||
createTime: string;
|
||||
lastUpdateTime: string;
|
||||
ipLocation: string;
|
||||
user: FeedUser;
|
||||
comments: Comment[];
|
||||
}
|
||||
|
||||
// -- Comment --------------------------------------------------------------
|
||||
|
||||
export interface Comment {
|
||||
id: string;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
likeCount: number;
|
||||
createTime: string;
|
||||
ipLocation: string;
|
||||
subCommentCount: number;
|
||||
subComments: Comment[];
|
||||
}
|
||||
|
||||
// -- User Profile ---------------------------------------------------------
|
||||
|
||||
export interface UserProfile {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
description: string;
|
||||
gender: string;
|
||||
ipLocation: string;
|
||||
follows: number;
|
||||
fans: number;
|
||||
interaction: number;
|
||||
feeds: Feed[];
|
||||
}
|
||||
|
||||
// -- Comment Notification -------------------------------------------------
|
||||
|
||||
export interface CommentNotification {
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
type: string;
|
||||
time: string;
|
||||
feedId: string;
|
||||
xsecToken: string;
|
||||
noteImage: string;
|
||||
}
|
||||
|
||||
// -- Search Filters -------------------------------------------------------
|
||||
|
||||
export interface SearchFilters {
|
||||
sort?: 'general' | 'time_descending' | 'popularity_descending';
|
||||
type?: 'all' | 'note' | 'video';
|
||||
time?: 'all' | 'day' | 'week' | 'half_year';
|
||||
}
|
||||
@@ -0,0 +1,425 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHS_SELECTORS } from './selectors.js';
|
||||
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
||||
import type { UserProfile, Feed } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const USER_PROFILE_BASE_URL = 'https://www.xiaohongshu.com/user/profile';
|
||||
|
||||
const SEL = XHS_SELECTORS.userProfile;
|
||||
|
||||
const log = logger.child({ module: 'xhs-user-profile' });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ raw types for user profile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface RawProfileState {
|
||||
user?: {
|
||||
userPageData?: RawUserPageData;
|
||||
userInfo?: RawUserInfo;
|
||||
};
|
||||
userProfile?: {
|
||||
userInfo?: RawUserInfo;
|
||||
notes?: RawProfileNote[];
|
||||
};
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface RawUserPageData {
|
||||
basicInfo?: RawUserInfo;
|
||||
interactions?: RawInteractions;
|
||||
notes?: RawProfileNote[];
|
||||
noteCount?: number | string;
|
||||
note_count?: number | string;
|
||||
}
|
||||
|
||||
interface RawUserInfo {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
nickName?: string;
|
||||
avatar?: string;
|
||||
avatarUrl?: string;
|
||||
avatar_url?: string;
|
||||
images?: string;
|
||||
desc?: string;
|
||||
description?: string;
|
||||
gender?: number | string;
|
||||
ipLocation?: string;
|
||||
ip_location?: string;
|
||||
fstatus?: string;
|
||||
follows?: number | string;
|
||||
fans?: number | string;
|
||||
interaction?: number | string;
|
||||
noteCount?: number | string;
|
||||
note_count?: number | string;
|
||||
}
|
||||
|
||||
interface RawInteractions {
|
||||
follows?: string | number;
|
||||
fans?: string | number;
|
||||
interaction?: string | number;
|
||||
}
|
||||
|
||||
interface RawProfileNote {
|
||||
id?: string;
|
||||
noteId?: string;
|
||||
note_id?: string;
|
||||
xsecToken?: string;
|
||||
xsec_token?: string;
|
||||
displayTitle?: string;
|
||||
display_title?: string;
|
||||
title?: string;
|
||||
desc?: string;
|
||||
type?: string;
|
||||
cover?: {
|
||||
url?: string;
|
||||
urlPre?: string;
|
||||
url_pre?: string;
|
||||
urlDefault?: string;
|
||||
url_default?: string;
|
||||
infoList?: Array<{ url?: string }>;
|
||||
info_list?: Array<{ url?: string }>;
|
||||
};
|
||||
user?: {
|
||||
userId?: string;
|
||||
user_id?: string;
|
||||
nickname?: string;
|
||||
nick_name?: string;
|
||||
avatar?: string;
|
||||
};
|
||||
interactInfo?: {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
};
|
||||
interact_info?: {
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
likeCount?: string;
|
||||
like_count?: string;
|
||||
};
|
||||
likedCount?: string;
|
||||
liked_count?: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getUserProfile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Navigate to a Xiaohongshu user profile page and extract their information,
|
||||
* including basic info, follower/following counts, and recent notes.
|
||||
*
|
||||
* @param page - A Playwright Page managed by BrowserManager.
|
||||
* @param userId - The user ID.
|
||||
* @param xsecToken - Security token required to access the profile page.
|
||||
* @returns A UserProfile object with the user's data.
|
||||
*/
|
||||
export async function getUserProfile(
|
||||
page: Page,
|
||||
userId: string,
|
||||
xsecToken: string,
|
||||
): Promise<UserProfile> {
|
||||
const url = `${USER_PROFILE_BASE_URL}/${userId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_feed`;
|
||||
log.debug({ userId, url }, 'Navigating to user profile page');
|
||||
|
||||
// XHS applies stricter bot detection on profile pages than on search pages.
|
||||
// Visiting the explore page first establishes a natural session context that
|
||||
// allows the subsequent profile navigation to pass the IP-risk check.
|
||||
await page.goto('https://www.xiaohongshu.com/explore', { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Wait for the user profile header to appear.
|
||||
await page
|
||||
.waitForSelector(SEL.headerContainer, { timeout: 15_000 })
|
||||
.catch(() => {
|
||||
log.warn({ userId }, 'User profile header not found within timeout, proceeding');
|
||||
});
|
||||
|
||||
// Allow render to settle.
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 1: Extract from __INITIAL_STATE__
|
||||
// -----------------------------------------------------------------------
|
||||
const initialState = await extractInitialState(page) as RawProfileState | null;
|
||||
|
||||
if (initialState) {
|
||||
const profile = parseProfileFromState(initialState, userId, xsecToken);
|
||||
if (profile) {
|
||||
log.info({ userId, feedsCount: profile.feeds.length }, 'Extracted user profile from __INITIAL_STATE__');
|
||||
return profile;
|
||||
}
|
||||
log.debug('__INITIAL_STATE__ found but no profile data extracted, falling back to DOM');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strategy 2: Fall back to DOM scraping
|
||||
// -----------------------------------------------------------------------
|
||||
log.debug({ userId }, 'Falling back to DOM scraping for user profile');
|
||||
const profile = await scrapeProfileFromDom(page, userId, xsecToken);
|
||||
log.info({ userId, feedCount: profile.feeds.length }, 'Extracted user profile from DOM');
|
||||
return profile;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// __INITIAL_STATE__ parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse user profile data from __INITIAL_STATE__.
|
||||
*/
|
||||
function parseProfileFromState(
|
||||
state: RawProfileState,
|
||||
userId: string,
|
||||
_xsecToken: string,
|
||||
): UserProfile | null {
|
||||
// Try multiple known locations for user data.
|
||||
const userPageData = state.user?.userPageData;
|
||||
const userInfo =
|
||||
userPageData?.basicInfo ??
|
||||
state.user?.userInfo ??
|
||||
state.userProfile?.userInfo;
|
||||
|
||||
if (!userInfo) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const id = userInfo.userId ?? userInfo.user_id ?? userId;
|
||||
const nickname = userInfo.nickname ?? userInfo.nick_name ?? userInfo.nickName ?? '';
|
||||
const avatar = userInfo.avatar ?? userInfo.avatarUrl ?? userInfo.avatar_url ?? userInfo.images ?? '';
|
||||
const description = userInfo.desc ?? userInfo.description ?? '';
|
||||
|
||||
// Gender: 0=unknown, 1=male, 2=female
|
||||
const genderRaw = userInfo.gender;
|
||||
let gender = '';
|
||||
if (genderRaw === 1 || genderRaw === '1') gender = 'male';
|
||||
else if (genderRaw === 2 || genderRaw === '2') gender = 'female';
|
||||
|
||||
const ipLocation = userInfo.ipLocation ?? userInfo.ip_location ?? '';
|
||||
|
||||
// Follower / following / interaction counts.
|
||||
const interactions = userPageData?.interactions;
|
||||
const follows = toNumber(interactions?.follows ?? userInfo.follows ?? 0);
|
||||
const fans = toNumber(interactions?.fans ?? userInfo.fans ?? 0);
|
||||
const interaction = toNumber(interactions?.interaction ?? userInfo.interaction ?? 0);
|
||||
|
||||
// Notes / feeds on the profile page.
|
||||
const rawNotes: RawProfileNote[] =
|
||||
userPageData?.notes ?? state.userProfile?.notes ?? [];
|
||||
const feeds = rawNotes
|
||||
.map((note) => parseProfileNote(note, userId))
|
||||
.filter((f): f is Feed => f !== null);
|
||||
|
||||
return {
|
||||
id,
|
||||
nickname,
|
||||
avatar: avatar ? ensureHttps(avatar) : '',
|
||||
description,
|
||||
gender,
|
||||
ipLocation,
|
||||
follows,
|
||||
fans,
|
||||
interaction,
|
||||
feeds,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a note from the user profile state into a Feed object.
|
||||
*/
|
||||
function parseProfileNote(
|
||||
raw: RawProfileNote,
|
||||
ownerUserId: string,
|
||||
): Feed | null {
|
||||
const id = raw.id ?? raw.noteId ?? raw.note_id ?? '';
|
||||
if (!id) return null;
|
||||
|
||||
const noteXsecToken = raw.xsecToken ?? raw.xsec_token ?? '';
|
||||
const title = raw.displayTitle ?? raw.display_title ?? raw.title ?? '';
|
||||
const description = raw.desc ?? '';
|
||||
const rawType = raw.type ?? '';
|
||||
const type: 'normal' | 'video' =
|
||||
rawType.toLowerCase().includes('video') ? 'video' : 'normal';
|
||||
|
||||
// Cover image.
|
||||
let coverUrl = '';
|
||||
if (raw.cover) {
|
||||
coverUrl =
|
||||
raw.cover.url ?? raw.cover.urlPre ?? raw.cover.url_pre ??
|
||||
raw.cover.urlDefault ?? raw.cover.url_default ?? '';
|
||||
if (!coverUrl) {
|
||||
const infoList = raw.cover.infoList ?? raw.cover.info_list;
|
||||
if (infoList && infoList.length > 0 && infoList[0]?.url) {
|
||||
coverUrl = infoList[0].url;
|
||||
}
|
||||
}
|
||||
if (coverUrl) coverUrl = ensureHttps(coverUrl);
|
||||
}
|
||||
|
||||
// Like count.
|
||||
const interact = raw.interactInfo ?? raw.interact_info;
|
||||
const likeCountStr =
|
||||
interact?.likedCount ?? interact?.liked_count ??
|
||||
interact?.likeCount ?? interact?.like_count ??
|
||||
raw.likedCount ?? raw.liked_count ?? '0';
|
||||
const likeCount = parseCountString(likeCountStr);
|
||||
|
||||
// User.
|
||||
const rawUser = raw.user;
|
||||
const user = {
|
||||
id: rawUser?.userId ?? rawUser?.user_id ?? ownerUserId,
|
||||
nickname: rawUser?.nickname ?? rawUser?.nick_name ?? '',
|
||||
avatar: rawUser?.avatar ?? '',
|
||||
};
|
||||
|
||||
return {
|
||||
id,
|
||||
xsecToken: noteXsecToken,
|
||||
title,
|
||||
description,
|
||||
type,
|
||||
coverUrl,
|
||||
likeCount,
|
||||
user,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM scraping fallback — uses Playwright Node-side API exclusively
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape user profile data from the rendered DOM using Playwright's
|
||||
* Node-side APIs to avoid needing DOM lib types.
|
||||
*/
|
||||
async function scrapeProfileFromDom(
|
||||
page: Page,
|
||||
userId: string,
|
||||
xsecToken: string,
|
||||
): Promise<UserProfile> {
|
||||
// Nickname
|
||||
const nickname = await page
|
||||
.$eval(SEL.nickname, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Avatar
|
||||
const avatar = await page
|
||||
.$eval(SEL.avatar, (img) => img.getAttribute('src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Description / bio
|
||||
const description = await page
|
||||
.$eval(SEL.description, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Gender — try the gender icon class.
|
||||
const gender = await page
|
||||
.$eval(SEL.gender, (el) => {
|
||||
const cls = el.className.toLowerCase();
|
||||
if (cls.includes('male') && !cls.includes('female')) return 'male';
|
||||
if (cls.includes('female')) return 'female';
|
||||
return '';
|
||||
})
|
||||
.catch(() => '');
|
||||
|
||||
// IP location
|
||||
const ipLocation = await page
|
||||
.$eval(SEL.ipLocation, (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
// Follower / following / interaction counts.
|
||||
// These are typically in a row of .data-item elements.
|
||||
const dataCounts = await page.$$eval(SEL.followCount, (items) =>
|
||||
items.map((item) => {
|
||||
const countEl = item.querySelector('.count');
|
||||
return countEl?.textContent?.trim() ?? '0';
|
||||
}),
|
||||
).catch(() => [] as string[]);
|
||||
|
||||
const follows = parseCountString(dataCounts[0] ?? '0');
|
||||
const fans = parseCountString(dataCounts[1] ?? '0');
|
||||
const interaction = parseCountString(dataCounts[2] ?? '0');
|
||||
|
||||
// Scrape feed items on the profile page.
|
||||
const feedElements = await page.$$(SEL.feedItem);
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const card of feedElements) {
|
||||
try {
|
||||
const href = await card
|
||||
.$eval('a.cover', (el) => el.getAttribute('href') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const idMatch = href.match(/\/(?:explore|search_result)\/([a-f0-9]+)|\/user\/profile\/[^/]+\/([a-f0-9]+)/);
|
||||
const tokenMatch = href.match(/xsec_token=([^&]+)/);
|
||||
const id = idMatch?.[1] ?? idMatch?.[2] ?? '';
|
||||
const noteXsecToken = tokenMatch?.[1] ?? '';
|
||||
|
||||
if (!id) continue;
|
||||
|
||||
const coverUrl = await card
|
||||
.$eval('a.cover img', (el) => el.getAttribute('src') ?? el.getAttribute('data-src') ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const feedTitle = await card
|
||||
.$eval('.footer .title', (el) => el.textContent?.trim() ?? '')
|
||||
.catch(() => '');
|
||||
|
||||
const likeText = await card
|
||||
.$eval('.footer .like-wrapper .count', (el) => el.textContent?.trim() ?? '0')
|
||||
.catch(() => '0');
|
||||
|
||||
const hasVideoIcon = await card.$('.play-icon').then((el) => el !== null).catch(() => false);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
xsecToken: noteXsecToken || xsecToken,
|
||||
title: feedTitle,
|
||||
description: '',
|
||||
type: hasVideoIcon ? 'video' : 'normal',
|
||||
coverUrl,
|
||||
likeCount: parseCountString(likeText),
|
||||
user: { id: userId, nickname: '', avatar: '' },
|
||||
});
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: userId,
|
||||
nickname,
|
||||
avatar,
|
||||
description,
|
||||
gender,
|
||||
ipLocation,
|
||||
follows,
|
||||
fans,
|
||||
interaction,
|
||||
feeds,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convert a string or number to a number, handling abbreviations.
|
||||
*/
|
||||
function toNumber(val: string | number): number {
|
||||
if (typeof val === 'number') return val;
|
||||
return parseCountString(val);
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { NotificationStateStore } from '../src/platforms/xiaohongshu/notification-state.js';
|
||||
import type { CommentNotification } from '../src/platforms/xiaohongshu/types.js';
|
||||
|
||||
function makeNotification(overrides?: Partial<CommentNotification>): CommentNotification {
|
||||
return {
|
||||
userId: 'u1',
|
||||
nickname: 'tester',
|
||||
avatar: 'https://example.com/a.png',
|
||||
content: '你好',
|
||||
type: '评论了你的笔记',
|
||||
time: '1分钟前',
|
||||
feedId: 'feed123',
|
||||
xsecToken: 'token123',
|
||||
noteImage: 'https://example.com/note.png',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('notification-state store', () => {
|
||||
it('upserts notifications and tracks status transitions', async () => {
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'social-mcp-notif-'));
|
||||
try {
|
||||
const store = new NotificationStateStore(tempDir, 'test.db');
|
||||
const n1 = makeNotification();
|
||||
|
||||
const first = store.upsertNotifications([n1]);
|
||||
expect(first).toEqual({ fetched: 1, inserted: 1, updated: 0 });
|
||||
|
||||
const second = store.upsertNotifications([n1]);
|
||||
expect(second).toEqual({ fetched: 1, inserted: 0, updated: 1 });
|
||||
|
||||
const openTasks = store.listByStatuses(['new'], 10);
|
||||
expect(openTasks).toHaveLength(1);
|
||||
expect(openTasks[0]?.notification.userId).toBe('u1');
|
||||
|
||||
const fp = store.findOpenFingerprint('u1', '你好');
|
||||
expect(fp).toBeTypeOf('string');
|
||||
|
||||
if (!fp) {
|
||||
throw new Error('fingerprint should not be null');
|
||||
}
|
||||
|
||||
store.markPending(fp);
|
||||
expect(store.listByStatuses(['pending'], 10)).toHaveLength(1);
|
||||
|
||||
store.markFailed(fp, 'network error');
|
||||
const failed = store.listByStatuses(['failed'], 10);
|
||||
expect(failed).toHaveLength(1);
|
||||
expect(failed[0]?.retryCount).toBe(1);
|
||||
expect(failed[0]?.errorMessage).toBe('network error');
|
||||
|
||||
store.markReplied(fp, '收到,感谢反馈');
|
||||
const replied = store.listByStatuses(['replied'], 10);
|
||||
expect(replied).toHaveLength(1);
|
||||
expect(replied[0]?.replyContent).toBe('收到,感谢反馈');
|
||||
} finally {
|
||||
await fs.rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "dist"
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
import { defineConfig } from 'tsup';
|
||||
|
||||
export default defineConfig({
|
||||
entry: ['src/main.ts'],
|
||||
noExternal: [/^@social\/core/],
|
||||
external: [
|
||||
'@modelcontextprotocol/sdk',
|
||||
/^@modelcontextprotocol\/sdk\//,
|
||||
'express',
|
||||
'pino',
|
||||
'pino-pretty',
|
||||
'rebrowser-playwright',
|
||||
'chromium-bidi/lib/cjs/bidiMapper/BidiMapper',
|
||||
'chromium-bidi/lib/cjs/cdp/CdpConnection',
|
||||
],
|
||||
format: ['esm'],
|
||||
target: 'node22',
|
||||
outDir: 'dist',
|
||||
clean: true,
|
||||
sourcemap: true,
|
||||
dts: false,
|
||||
splitting: false,
|
||||
shims: false,
|
||||
});
|
||||
@@ -0,0 +1,14 @@
|
||||
import { defineConfig } from 'vitest/config';
|
||||
import path from 'node:path';
|
||||
|
||||
export default defineConfig({
|
||||
resolve: {
|
||||
alias: {
|
||||
'@social/core': path.resolve(__dirname, '../../packages/core/src'),
|
||||
},
|
||||
},
|
||||
test: {
|
||||
include: ['test/**/*.test.ts'],
|
||||
environment: 'node',
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user