重构为Monorepo:拆分xhs/xhh应用与core包并完成双服务部署改造
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
import { startServerWithPlugins } from '@social/core/server/bootstrap.js';
|
||||
import { xiaoheihePlugin } from './platforms/xiaoheihe/index.js';
|
||||
|
||||
startServerWithPlugins([xiaoheihePlugin]);
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import { detectCaptchaText } from './extractors.js';
|
||||
|
||||
const log = logger.child({ module: 'xhh-comment' });
|
||||
|
||||
function buildDetailUrl(linkId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/bbs/link/${encodeURIComponent(linkId)}`;
|
||||
}
|
||||
|
||||
export async function postComment(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
content: string,
|
||||
): Promise<{ success: boolean; comment_id?: string }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on comment page');
|
||||
}
|
||||
|
||||
const ok = await fillCommentInput(page, content);
|
||||
if (!ok) return { success: false };
|
||||
|
||||
const submitted = await clickFirstVisible(page, XHH_SELECTORS.detail.commentSubmit);
|
||||
if (!submitted) return { success: false };
|
||||
|
||||
await page.waitForTimeout(1_500);
|
||||
|
||||
const commentId = await page.evaluate(
|
||||
({ selectors, contentLike }: { selectors: typeof XHH_SELECTORS; contentLike: string }) => {
|
||||
const nodes = [...document.querySelectorAll<HTMLElement>(selectors.detail.commentItem.join(', '))];
|
||||
const hit = nodes.find((node) => node.textContent?.includes(contentLike));
|
||||
if (!hit) return '';
|
||||
return (
|
||||
hit.getAttribute('data-comment-id') ||
|
||||
hit.getAttribute('comment-id') ||
|
||||
hit.id ||
|
||||
''
|
||||
);
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, contentLike: content.slice(0, 24) },
|
||||
);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
...(commentId ? { comment_id: commentId as string } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export async function replyComment(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
commentId: string,
|
||||
content: string,
|
||||
): Promise<{ success: boolean; reply_id?: string }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on reply page');
|
||||
}
|
||||
|
||||
await page.evaluate(
|
||||
({ selectors, targetCommentId }) => {
|
||||
const comments = [...document.querySelectorAll<HTMLElement>(selectors.detail.commentItem.join(', '))];
|
||||
const target = comments.find((node) => {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
if (id === targetCommentId) return true;
|
||||
return node.outerHTML.includes(targetCommentId);
|
||||
});
|
||||
if (!target) return;
|
||||
const replyBtn = [...target.querySelectorAll<HTMLElement>('button, [role="button"], .reply-btn, .comment-reply')]
|
||||
.find((node) => {
|
||||
const text = (node.textContent ?? '').trim();
|
||||
const cls = node.className.toString().toLowerCase();
|
||||
return text.includes('回复') || cls.includes('reply');
|
||||
}) ?? null;
|
||||
replyBtn?.click();
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, targetCommentId: commentId },
|
||||
);
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
const ok = await fillCommentInput(page, content);
|
||||
if (!ok) return { success: false };
|
||||
|
||||
const submitted = await clickFirstVisible(page, XHH_SELECTORS.detail.commentSubmit);
|
||||
if (!submitted) return { success: false };
|
||||
|
||||
await page.waitForTimeout(1_500);
|
||||
|
||||
const replyId = await page.evaluate(
|
||||
(
|
||||
{
|
||||
selectors,
|
||||
targetCommentId,
|
||||
contentLike,
|
||||
}: { selectors: typeof XHH_SELECTORS; targetCommentId: string; contentLike: string },
|
||||
) => {
|
||||
const comments = [...document.querySelectorAll<HTMLElement>(selectors.detail.commentItem.join(', '))];
|
||||
const target = comments.find((node) => {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
if (id === targetCommentId) return true;
|
||||
return node.outerHTML.includes(targetCommentId);
|
||||
});
|
||||
if (!target) return '';
|
||||
const replies = [...target.querySelectorAll<HTMLElement>(selectors.detail.subCommentItem.join(', '))];
|
||||
const hit = replies.find((node) => node.textContent?.includes(contentLike));
|
||||
if (!hit) return '';
|
||||
return (
|
||||
hit.getAttribute('data-comment-id') ||
|
||||
hit.getAttribute('comment-id') ||
|
||||
hit.id ||
|
||||
''
|
||||
);
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, targetCommentId: commentId, contentLike: content.slice(0, 24) },
|
||||
);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
...(replyId ? { reply_id: replyId as string } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
async function fillCommentInput(page: Page, content: string): Promise<boolean> {
|
||||
for (const selector of XHH_SELECTORS.detail.commentInput) {
|
||||
const input = await page.$(selector).catch(() => null);
|
||||
if (!input) continue;
|
||||
await input.click().catch(() => {});
|
||||
await page.waitForTimeout(200);
|
||||
|
||||
const isOk = await page
|
||||
.evaluate(
|
||||
({ selector, content }) => {
|
||||
const node = document.querySelector(selector);
|
||||
if (!node) return false;
|
||||
if (node instanceof HTMLTextAreaElement || node instanceof HTMLInputElement) {
|
||||
node.value = content;
|
||||
node.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
return true;
|
||||
}
|
||||
if (node instanceof HTMLElement && node.isContentEditable) {
|
||||
node.focus();
|
||||
node.textContent = content;
|
||||
node.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
},
|
||||
{ selector, content },
|
||||
)
|
||||
.catch(() => false);
|
||||
|
||||
if (isOk) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function clickFirstVisible(page: Page, selectors: readonly string[]): Promise<boolean> {
|
||||
for (const selector of selectors) {
|
||||
const clicked = await page
|
||||
.locator(selector)
|
||||
.first()
|
||||
.click({ timeout: 2_000 })
|
||||
.then(() => true)
|
||||
.catch(() => false);
|
||||
if (clicked) return true;
|
||||
}
|
||||
log.warn({ selectors }, 'no clickable submit button');
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
export interface KeysetCursorPayload {
|
||||
key: string;
|
||||
}
|
||||
|
||||
export interface KeysetPage<T> {
|
||||
items: T[];
|
||||
hasMore: boolean;
|
||||
nextCursor?: string;
|
||||
}
|
||||
|
||||
export function encodeKeysetCursor(payload: KeysetCursorPayload): string {
|
||||
return Buffer.from(JSON.stringify(payload), 'utf8').toString('base64url');
|
||||
}
|
||||
|
||||
export function decodeKeysetCursor(cursor?: string): KeysetCursorPayload | undefined {
|
||||
if (!cursor) return undefined;
|
||||
|
||||
try {
|
||||
const raw = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf8')) as {
|
||||
key?: unknown;
|
||||
};
|
||||
|
||||
if (typeof raw.key !== 'string' || raw.key.length === 0) {
|
||||
throw new Error('Invalid keyset cursor payload');
|
||||
}
|
||||
|
||||
return { key: raw.key };
|
||||
} catch {
|
||||
throw new Error('Invalid cursor for keyset pagination');
|
||||
}
|
||||
}
|
||||
|
||||
export function paginateByKeyset<T>(
|
||||
items: T[],
|
||||
maxCount: number,
|
||||
cursor: KeysetCursorPayload | undefined,
|
||||
keyOf: (item: T) => string,
|
||||
): KeysetPage<T> {
|
||||
if (maxCount <= 0) {
|
||||
return { items: [], hasMore: false };
|
||||
}
|
||||
|
||||
const start = cursor
|
||||
? Math.max(0, items.findIndex((item) => keyOf(item) === cursor.key) + 1)
|
||||
: 0;
|
||||
|
||||
const pageItems = items.slice(start, start + maxCount);
|
||||
const hasMore = start + pageItems.length < items.length;
|
||||
|
||||
if (!hasMore || pageItems.length === 0) {
|
||||
return {
|
||||
items: pageItems,
|
||||
hasMore,
|
||||
};
|
||||
}
|
||||
|
||||
const nextCursor = encodeKeysetCursor({
|
||||
key: keyOf(pageItems[pageItems.length - 1]!),
|
||||
});
|
||||
|
||||
return {
|
||||
items: pageItems,
|
||||
hasMore,
|
||||
nextCursor,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
import type { Feed } from './types.js';
|
||||
|
||||
export function parseCountString(raw: string | number | null | undefined): number {
|
||||
if (typeof raw === 'number') {
|
||||
return Number.isFinite(raw) ? raw : 0;
|
||||
}
|
||||
|
||||
const text = (raw ?? '').toString().trim().replace(/,/g, '');
|
||||
if (!text) return 0;
|
||||
|
||||
if (text.endsWith('万')) {
|
||||
const num = Number.parseFloat(text.slice(0, -1));
|
||||
if (Number.isNaN(num)) return 0;
|
||||
return Math.round(num * 10_000);
|
||||
}
|
||||
|
||||
const intNum = Number.parseInt(text, 10);
|
||||
return Number.isNaN(intNum) ? 0 : intNum;
|
||||
}
|
||||
|
||||
export function detectCaptchaText(text: string): boolean {
|
||||
const haystack = text.toLowerCase();
|
||||
return (
|
||||
haystack.includes('captcha') ||
|
||||
haystack.includes('show_captcha') ||
|
||||
haystack.includes('验证码') ||
|
||||
haystack.includes('tencentcaptcha')
|
||||
);
|
||||
}
|
||||
|
||||
export function extractLinkIdFromUrl(rawUrl: string): string | undefined {
|
||||
const trimmed = rawUrl.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
try {
|
||||
const url = /^https?:\/\//i.test(trimmed)
|
||||
? new URL(trimmed)
|
||||
: trimmed.startsWith('/')
|
||||
? new URL(`https://www.xiaoheihe.cn${trimmed}`)
|
||||
: new URL(`https://${trimmed}`);
|
||||
|
||||
const pathMatch = url.pathname.match(/\/app\/bbs\/link\/(\d+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
|
||||
const queryLinkId = url.searchParams.get('link_id') ?? url.searchParams.get('linkid');
|
||||
return queryLinkId || undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function extractUserIdFromUrl(rawUrl: string): string | undefined {
|
||||
const trimmed = rawUrl.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
try {
|
||||
const url = /^https?:\/\//i.test(trimmed)
|
||||
? new URL(trimmed)
|
||||
: trimmed.startsWith('/')
|
||||
? new URL(`https://www.xiaoheihe.cn${trimmed}`)
|
||||
: new URL(`https://${trimmed}`);
|
||||
|
||||
const pathMatch = url.pathname.match(/\/app\/user\/profile\/(\d+)/);
|
||||
if (pathMatch?.[1]) return pathMatch[1];
|
||||
|
||||
const queryUserId = url.searchParams.get('userid') ?? url.searchParams.get('user_id');
|
||||
return queryUserId || undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function parseFeedsFromHtmlSnapshot(html: string): Feed[] {
|
||||
const matches = [...html.matchAll(/href="(\/app\/bbs\/link\/\d+)"/g)];
|
||||
const seen = new Set<string>();
|
||||
const feeds: Feed[] = [];
|
||||
|
||||
for (const m of matches) {
|
||||
const href = m[1];
|
||||
if (!href) continue;
|
||||
const id = extractLinkIdFromUrl(href);
|
||||
if (!id || seen.has(id)) continue;
|
||||
seen.add(id);
|
||||
|
||||
feeds.push({
|
||||
id,
|
||||
title: '',
|
||||
description: '',
|
||||
coverUrl: '',
|
||||
likeCount: 0,
|
||||
commentCount: 0,
|
||||
user: {
|
||||
id: '',
|
||||
nickname: '',
|
||||
avatar: '',
|
||||
},
|
||||
linkUrl: `https://www.xiaoheihe.cn${href}`,
|
||||
});
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}
|
||||
|
||||
export function firstNonEmpty(...values: Array<string | null | undefined>): string {
|
||||
for (const value of values) {
|
||||
const trimmed = value?.trim();
|
||||
if (trimmed) return trimmed;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
@@ -0,0 +1,266 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { Comment, FeedDetail } from './types.js';
|
||||
import { detectCaptchaText, firstNonEmpty, parseCountString } from './extractors.js';
|
||||
|
||||
const log = logger.child({ module: 'xhh-feed-detail' });
|
||||
|
||||
function buildDetailUrl(linkId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/bbs/link/${encodeURIComponent(linkId)}`;
|
||||
}
|
||||
|
||||
interface RawComment {
|
||||
id: string;
|
||||
parentId?: string;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
likeCount: string | number;
|
||||
createTime: string;
|
||||
subComments: RawComment[];
|
||||
}
|
||||
|
||||
interface RawDetail {
|
||||
title: string;
|
||||
description: string;
|
||||
images: string[];
|
||||
likeCount: string | number;
|
||||
favoriteCount: string | number;
|
||||
commentCount: string | number;
|
||||
isLiked: boolean;
|
||||
isFavorited: boolean;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
comments: RawComment[];
|
||||
}
|
||||
|
||||
export async function getFeedDetail(page: Page, linkId: string): Promise<FeedDetail> {
|
||||
const url = buildDetailUrl(linkId);
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const bodyText = await page.textContent('body').catch(() => '');
|
||||
if (bodyText && detectCaptchaText(bodyText)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on feed detail');
|
||||
}
|
||||
|
||||
const raw = await page.evaluate((selectors: typeof XHH_SELECTORS) => {
|
||||
const pickText = (selector: string): string =>
|
||||
(document.querySelector(selector)?.textContent ?? '').trim();
|
||||
const pickFrom = (selectorList: readonly string[]): string => {
|
||||
for (const selector of selectorList) {
|
||||
const text = pickText(selector);
|
||||
if (text) return text;
|
||||
}
|
||||
return '';
|
||||
};
|
||||
const pickAttr = (selector: string, attr: string): string =>
|
||||
(document.querySelector(selector)?.getAttribute(attr) ?? '').trim();
|
||||
|
||||
const title = pickFrom(selectors.detail.title);
|
||||
const description = pickFrom(selectors.detail.description);
|
||||
const images = [...document.querySelectorAll<HTMLImageElement>(selectors.detail.image)]
|
||||
.map((img) => img.src)
|
||||
.filter(Boolean);
|
||||
const likeCount = pickText(selectors.detail.likeCount);
|
||||
const favoriteCount = pickText(selectors.detail.favoriteCount);
|
||||
const commentCount = pickText(selectors.detail.commentCount);
|
||||
|
||||
const likeBtn = selectors.detail.likeButton
|
||||
.map((sel: string) => document.querySelector(sel))
|
||||
.find(Boolean) as Element | undefined;
|
||||
const favBtn = selectors.detail.favoriteButton
|
||||
.map((sel: string) => document.querySelector(sel))
|
||||
.find(Boolean) as Element | undefined;
|
||||
|
||||
const isLiked =
|
||||
Boolean(likeBtn?.getAttribute('aria-pressed') === 'true') ||
|
||||
Boolean(likeBtn?.className.toString().toLowerCase().includes('active')) ||
|
||||
Boolean(likeBtn?.innerHTML.toLowerCase().includes('filled'));
|
||||
const isFavorited =
|
||||
Boolean(favBtn?.getAttribute('aria-pressed') === 'true') ||
|
||||
Boolean(favBtn?.className.toString().toLowerCase().includes('active')) ||
|
||||
Boolean(favBtn?.innerHTML.toLowerCase().includes('filled'));
|
||||
|
||||
const userLink = pickAttr(selectors.detail.userLink, 'href');
|
||||
const nickname = pickFrom(selectors.detail.userName);
|
||||
const avatar = pickAttr(selectors.detail.userAvatar, 'src');
|
||||
|
||||
const commentSelector = selectors.detail.commentItem.join(', ');
|
||||
const subSelector = selectors.detail.subCommentItem.join(', ');
|
||||
const comments: RawComment[] = [];
|
||||
|
||||
for (const node of document.querySelectorAll<HTMLElement>(commentSelector)) {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
|
||||
const authorNode = node.querySelector(selectors.detail.commentAuthor);
|
||||
const authorLink = authorNode?.getAttribute('href') ?? '';
|
||||
const userId = authorLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '';
|
||||
const nickname = (authorNode?.textContent ?? '').trim();
|
||||
const avatar = (node.querySelector(selectors.detail.commentAvatar) as HTMLImageElement | null)?.src ?? '';
|
||||
const content = (node.querySelector(selectors.detail.commentContent)?.textContent ?? '').trim();
|
||||
const createTime = (node.querySelector(selectors.detail.commentTime)?.textContent ?? '').trim();
|
||||
const likeCount = (node.querySelector(selectors.detail.commentLikeCount)?.textContent ?? '').trim();
|
||||
|
||||
const subComments: RawComment[] = [];
|
||||
for (const subNode of node.querySelectorAll<HTMLElement>(subSelector)) {
|
||||
const subId =
|
||||
subNode.getAttribute('data-comment-id') ||
|
||||
subNode.getAttribute('comment-id') ||
|
||||
subNode.id ||
|
||||
'';
|
||||
const subAuthorNode = subNode.querySelector(selectors.detail.commentAuthor);
|
||||
const subAuthorLink = subAuthorNode?.getAttribute('href') ?? '';
|
||||
const subUserId = subAuthorLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '';
|
||||
subComments.push({
|
||||
id: subId,
|
||||
parentId: id || undefined,
|
||||
userId: subUserId,
|
||||
nickname: (subAuthorNode?.textContent ?? '').trim(),
|
||||
avatar: (subNode.querySelector(selectors.detail.commentAvatar) as HTMLImageElement | null)?.src ?? '',
|
||||
content: (subNode.querySelector(selectors.detail.commentContent)?.textContent ?? '').trim(),
|
||||
createTime: (subNode.querySelector(selectors.detail.commentTime)?.textContent ?? '').trim(),
|
||||
likeCount: (subNode.querySelector(selectors.detail.commentLikeCount)?.textContent ?? '').trim(),
|
||||
subComments: [],
|
||||
});
|
||||
}
|
||||
|
||||
comments.push({
|
||||
id,
|
||||
userId,
|
||||
nickname,
|
||||
avatar,
|
||||
content,
|
||||
likeCount,
|
||||
createTime,
|
||||
subComments,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
title,
|
||||
description,
|
||||
images,
|
||||
likeCount,
|
||||
favoriteCount,
|
||||
commentCount,
|
||||
isLiked,
|
||||
isFavorited,
|
||||
userId: userLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '',
|
||||
nickname,
|
||||
avatar,
|
||||
comments,
|
||||
};
|
||||
}, XHH_SELECTORS) as RawDetail;
|
||||
|
||||
const detail: FeedDetail = {
|
||||
id: linkId,
|
||||
title: raw.title,
|
||||
description: raw.description,
|
||||
images: raw.images,
|
||||
likeCount: parseCountString(raw.likeCount),
|
||||
favoriteCount: parseCountString(raw.favoriteCount),
|
||||
commentCount: parseCountString(raw.commentCount),
|
||||
isLiked: raw.isLiked,
|
||||
isFavorited: raw.isFavorited,
|
||||
user: {
|
||||
id: raw.userId,
|
||||
nickname: raw.nickname,
|
||||
avatar: raw.avatar,
|
||||
},
|
||||
comments: raw.comments.map(normalizeComment),
|
||||
};
|
||||
|
||||
if (!detail.title && !detail.description) {
|
||||
throw new Error('waiting for selector: xhh detail not found');
|
||||
}
|
||||
|
||||
log.info({ linkId, commentCount: detail.comments.length }, 'xhh feed detail extracted');
|
||||
return detail;
|
||||
}
|
||||
|
||||
export async function getSubComments(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
commentId: string,
|
||||
maxCount = 200,
|
||||
): Promise<Comment[]> {
|
||||
const url = buildDetailUrl(linkId);
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on sub-comments page');
|
||||
}
|
||||
|
||||
const expandSelector = XHH_SELECTORS.detail.commentExpandReplies;
|
||||
await page.locator(expandSelector).first().click().catch(() => {});
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
const subComments = await page.evaluate(
|
||||
({ selectors, targetCommentId }) => {
|
||||
const commentSelector = selectors.detail.commentItem.join(', ');
|
||||
const subSelector = selectors.detail.subCommentItem.join(', ');
|
||||
|
||||
const comments = [...document.querySelectorAll<HTMLElement>(commentSelector)];
|
||||
const target = comments.find((node) => {
|
||||
const id =
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'';
|
||||
if (id === targetCommentId) return true;
|
||||
return node.outerHTML.includes(targetCommentId);
|
||||
});
|
||||
if (!target) return [] as RawComment[];
|
||||
|
||||
const out: RawComment[] = [];
|
||||
for (const node of target.querySelectorAll<HTMLElement>(subSelector)) {
|
||||
const authorNode = node.querySelector(selectors.detail.commentAuthor);
|
||||
const authorLink = authorNode?.getAttribute('href') ?? '';
|
||||
out.push({
|
||||
id:
|
||||
node.getAttribute('data-comment-id') ||
|
||||
node.getAttribute('comment-id') ||
|
||||
node.id ||
|
||||
'',
|
||||
parentId: targetCommentId,
|
||||
userId: authorLink.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '',
|
||||
nickname: (authorNode?.textContent ?? '').trim(),
|
||||
avatar: (node.querySelector(selectors.detail.commentAvatar) as HTMLImageElement | null)?.src ?? '',
|
||||
content: (node.querySelector(selectors.detail.commentContent)?.textContent ?? '').trim(),
|
||||
createTime: (node.querySelector(selectors.detail.commentTime)?.textContent ?? '').trim(),
|
||||
likeCount: (node.querySelector(selectors.detail.commentLikeCount)?.textContent ?? '').trim(),
|
||||
subComments: [],
|
||||
});
|
||||
}
|
||||
return out;
|
||||
},
|
||||
{ selectors: XHH_SELECTORS, targetCommentId: commentId },
|
||||
);
|
||||
|
||||
return subComments.slice(0, maxCount).map(normalizeComment);
|
||||
}
|
||||
|
||||
function normalizeComment(input: RawComment): Comment {
|
||||
return {
|
||||
id: firstNonEmpty(input.id, `${Date.now()}-${Math.random()}`),
|
||||
...(input.parentId ? { parentId: input.parentId } : {}),
|
||||
userId: input.userId,
|
||||
nickname: input.nickname,
|
||||
avatar: input.avatar,
|
||||
content: input.content,
|
||||
likeCount: parseCountString(input.likeCount),
|
||||
createTime: input.createTime,
|
||||
subComments: input.subComments.map(normalizeComment),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,268 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { Feed } from './types.js';
|
||||
import {
|
||||
detectCaptchaText,
|
||||
extractLinkIdFromUrl,
|
||||
firstNonEmpty,
|
||||
parseCountString,
|
||||
} from './extractors.js';
|
||||
|
||||
const HOME_URL = 'https://www.xiaoheihe.cn/app/bbs/home';
|
||||
const log = logger.child({ module: 'xhh-feeds' });
|
||||
|
||||
interface RawFeedCandidate {
|
||||
id?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
coverUrl?: string;
|
||||
likeCount?: string | number;
|
||||
commentCount?: string | number;
|
||||
userId?: string;
|
||||
nickname?: string;
|
||||
avatar?: string;
|
||||
linkUrl?: string;
|
||||
}
|
||||
|
||||
export async function listFeeds(page: Page): Promise<Feed[]> {
|
||||
await page.goto(HOME_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_500);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on feeds page');
|
||||
}
|
||||
|
||||
const nuxtFeeds = await extractFeedsFromNuxt(page);
|
||||
const domFeeds = await extractFeedsFromDom(page);
|
||||
|
||||
const merged = [...nuxtFeeds, ...domFeeds];
|
||||
const result = dedupeAndNormalize(merged);
|
||||
log.info({ count: result.length }, 'xhh feeds extracted');
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function searchFeeds(page: Page, keyword: string): Promise<Feed[]> {
|
||||
const targetUrl = `https://www.xiaoheihe.cn/app/bbs/search?keyword=${encodeURIComponent(keyword)}`;
|
||||
await page.goto(targetUrl, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on search page');
|
||||
}
|
||||
|
||||
const combined = dedupeAndNormalize([
|
||||
...(await extractFeedsFromNuxt(page)),
|
||||
...(await extractFeedsFromDom(page)),
|
||||
]);
|
||||
|
||||
if (combined.length > 0) {
|
||||
return combined.filter((item) => {
|
||||
const haystack = `${item.title} ${item.description} ${item.user.nickname}`.toLowerCase();
|
||||
return haystack.includes(keyword.toLowerCase());
|
||||
});
|
||||
}
|
||||
|
||||
// Fallback: when search route structure changes, use home feeds and filter.
|
||||
const homeFeeds = await listFeeds(page);
|
||||
return homeFeeds.filter((item) => {
|
||||
const haystack = `${item.title} ${item.description} ${item.user.nickname}`.toLowerCase();
|
||||
return haystack.includes(keyword.toLowerCase());
|
||||
});
|
||||
}
|
||||
|
||||
async function extractFeedsFromNuxt(page: Page): Promise<RawFeedCandidate[]> {
|
||||
const data = await page
|
||||
.evaluate(() => {
|
||||
const root: unknown =
|
||||
(window as { __NUXT_DATA__?: unknown }).__NUXT_DATA__ ??
|
||||
(window as { __NUXT__?: { data?: unknown } }).__NUXT__?.data ??
|
||||
null;
|
||||
|
||||
const out: Array<Record<string, unknown>> = [];
|
||||
const visited = new Set<unknown>();
|
||||
|
||||
function walk(value: unknown): void {
|
||||
if (!value || typeof value !== 'object') return;
|
||||
if (visited.has(value)) return;
|
||||
visited.add(value);
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
for (const item of value) walk(item);
|
||||
return;
|
||||
}
|
||||
|
||||
const obj = value as Record<string, unknown>;
|
||||
|
||||
const id =
|
||||
(typeof obj['link_id'] === 'string' && obj['link_id']) ||
|
||||
(typeof obj['linkid'] === 'string' && obj['linkid']) ||
|
||||
(typeof obj['id'] === 'string' && obj['id']) ||
|
||||
(typeof obj['post_id'] === 'string' && obj['post_id']) ||
|
||||
'';
|
||||
const url =
|
||||
(typeof obj['link_url'] === 'string' && obj['link_url']) ||
|
||||
(typeof obj['url'] === 'string' && obj['url']) ||
|
||||
'';
|
||||
const title =
|
||||
(typeof obj['title'] === 'string' && obj['title']) ||
|
||||
(typeof obj['subject'] === 'string' && obj['subject']) ||
|
||||
'';
|
||||
|
||||
const hasLink = (typeof url === 'string' && url.includes('/app/bbs/link/'));
|
||||
if (id || hasLink || title) {
|
||||
out.push(obj);
|
||||
}
|
||||
|
||||
for (const next of Object.values(obj)) {
|
||||
walk(next);
|
||||
}
|
||||
}
|
||||
|
||||
walk(root);
|
||||
return out.slice(0, 500);
|
||||
})
|
||||
.catch(() => []);
|
||||
|
||||
return (data as Array<Record<string, unknown>>).map((item) => {
|
||||
const linkUrl = firstNonEmpty(
|
||||
valueString(item['link_url']),
|
||||
valueString(item['url']),
|
||||
valueString(item['jump_url']),
|
||||
);
|
||||
const user = (item['user'] ?? item['author']) as Record<string, unknown> | undefined;
|
||||
return {
|
||||
id: firstNonEmpty(
|
||||
valueString(item['link_id']),
|
||||
valueString(item['linkid']),
|
||||
valueString(item['post_id']),
|
||||
valueString(item['id']),
|
||||
),
|
||||
title: firstNonEmpty(valueString(item['title']), valueString(item['subject'])),
|
||||
description: firstNonEmpty(
|
||||
valueString(item['description']),
|
||||
valueString(item['content']),
|
||||
valueString(item['desc']),
|
||||
),
|
||||
coverUrl: firstNonEmpty(
|
||||
valueString(item['cover']),
|
||||
valueString(item['cover_url']),
|
||||
valueString(item['image']),
|
||||
),
|
||||
likeCount: valueString(item['like_count']) || valueString(item['likes']),
|
||||
commentCount: valueString(item['comment_count']) || valueString(item['comments']),
|
||||
userId: firstNonEmpty(
|
||||
valueString(user?.['userid']),
|
||||
valueString(user?.['user_id']),
|
||||
valueString(item['userid']),
|
||||
),
|
||||
nickname: firstNonEmpty(
|
||||
valueString(user?.['nickname']),
|
||||
valueString(user?.['name']),
|
||||
valueString(item['nickname']),
|
||||
),
|
||||
avatar: firstNonEmpty(
|
||||
valueString(user?.['avatar']),
|
||||
valueString(user?.['avatar_url']),
|
||||
),
|
||||
linkUrl,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function extractFeedsFromDom(page: Page): Promise<RawFeedCandidate[]> {
|
||||
return page
|
||||
.evaluate((selectors) => {
|
||||
const anchors = [...document.querySelectorAll<HTMLAnchorElement>(selectors.feed.link)]
|
||||
.filter((a) => Boolean(a.getAttribute('href')));
|
||||
|
||||
const feeds: RawFeedCandidate[] = [];
|
||||
const cardSelector = selectors.feed.card.join(', ');
|
||||
const titleSelector = selectors.feed.title.join(', ');
|
||||
const descSelector = selectors.feed.description.join(', ');
|
||||
const userNameSelector = selectors.feed.userName.join(', ');
|
||||
const likeSelector = selectors.feed.likeCount.join(', ');
|
||||
const commentSelector = selectors.feed.commentCount.join(', ');
|
||||
|
||||
for (const link of anchors) {
|
||||
const href = link.getAttribute('href') ?? '';
|
||||
const card = link.closest(cardSelector) ?? link.parentElement;
|
||||
const title = (card?.querySelector(titleSelector)?.textContent ?? '').trim();
|
||||
const description = (card?.querySelector(descSelector)?.textContent ?? '').trim();
|
||||
const cover = (card?.querySelector(selectors.feed.cover) as HTMLImageElement | null)?.src ?? '';
|
||||
const userNode = card?.querySelector(selectors.feed.userLink) as HTMLAnchorElement | null;
|
||||
const username = (card?.querySelector(userNameSelector)?.textContent ?? '').trim();
|
||||
const likeCount = (card?.querySelector(likeSelector)?.textContent ?? '').trim();
|
||||
const commentCount = (card?.querySelector(commentSelector)?.textContent ?? '').trim();
|
||||
|
||||
feeds.push({
|
||||
linkUrl: href,
|
||||
title,
|
||||
description,
|
||||
coverUrl: cover,
|
||||
userId: userNode?.getAttribute('href') ?? '',
|
||||
nickname: username,
|
||||
avatar: (card?.querySelector('img') as HTMLImageElement | null)?.src ?? '',
|
||||
likeCount,
|
||||
commentCount,
|
||||
});
|
||||
}
|
||||
|
||||
return feeds;
|
||||
}, XHH_SELECTORS)
|
||||
.catch(() => []);
|
||||
}
|
||||
|
||||
function dedupeAndNormalize(items: RawFeedCandidate[]): Feed[] {
|
||||
const output: Feed[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const item of items) {
|
||||
const linkId = firstNonEmpty(item.id, item.linkUrl ? extractLinkIdFromUrl(item.linkUrl) ?? '' : '');
|
||||
if (!linkId || seen.has(linkId)) continue;
|
||||
seen.add(linkId);
|
||||
|
||||
const linkUrl = normalizeLinkUrl(item.linkUrl, linkId);
|
||||
const userIdFromHref = item.userId ? extractUserIdFromMaybeHref(item.userId) : '';
|
||||
|
||||
output.push({
|
||||
id: linkId,
|
||||
title: item.title?.trim() ?? '',
|
||||
description: item.description?.trim() ?? '',
|
||||
coverUrl: item.coverUrl?.trim() ?? '',
|
||||
likeCount: parseCountString(item.likeCount),
|
||||
commentCount: parseCountString(item.commentCount),
|
||||
user: {
|
||||
id: userIdFromHref,
|
||||
nickname: item.nickname?.trim() ?? '',
|
||||
avatar: item.avatar?.trim() ?? '',
|
||||
},
|
||||
linkUrl,
|
||||
});
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function normalizeLinkUrl(rawUrl: string | undefined, linkId: string): string {
|
||||
const trimmed = rawUrl?.trim() ?? '';
|
||||
if (!trimmed) return `https://www.xiaoheihe.cn/app/bbs/link/${linkId}`;
|
||||
if (/^https?:\/\//i.test(trimmed)) return trimmed;
|
||||
if (trimmed.startsWith('/')) return `https://www.xiaoheihe.cn${trimmed}`;
|
||||
return `https://${trimmed}`;
|
||||
}
|
||||
|
||||
function extractUserIdFromMaybeHref(raw: string): string {
|
||||
const normalized = raw.startsWith('/') ? `https://www.xiaoheihe.cn${raw}` : raw;
|
||||
return normalized.match(/\/app\/user\/profile\/(\d+)/)?.[1] ?? '';
|
||||
}
|
||||
|
||||
function valueString(value: unknown): string {
|
||||
if (typeof value === 'string') return value;
|
||||
if (typeof value === 'number') return String(value);
|
||||
return '';
|
||||
}
|
||||
|
||||
@@ -0,0 +1,389 @@
|
||||
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import type { Router } from 'express';
|
||||
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { config } from '@social/core/config/index.js';
|
||||
import type { PlatformPlugin } from '@social/core/server/app.js';
|
||||
import { withErrorHandling, type McpToolResult } from '@social/core/utils/errors.js';
|
||||
import { computeIdempotencyHash, getIdempotencyStore } from '@social/core/utils/idempotency.js';
|
||||
import { deleteCookies, checkLoginStatus, getLoginQRCode } from './login.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { getFeedDetail, getSubComments } from './feed-detail.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
import { listMyPosts } from './my-posts.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { setFavoriteState, setLikeState } from './interaction.js';
|
||||
import { resolveFeedTarget, resolveUserTarget } from './target-resolver.js';
|
||||
import {
|
||||
CheckLoginSchema,
|
||||
DeleteCookiesSchema,
|
||||
GetFeedDetailSchema,
|
||||
GetLoginQRCodeSchema,
|
||||
GetSubCommentsSchema,
|
||||
GetUserProfileSchema,
|
||||
ListFeedsSchema,
|
||||
ListMyPostsSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
SearchSchema,
|
||||
SetFavoriteStateSchema,
|
||||
SetLikeStateSchema,
|
||||
} from './schemas.js';
|
||||
import { createXhhRoutes } from './routes.js';
|
||||
import { decodeKeysetCursor, paginateByKeyset } from './cursor.js';
|
||||
import type { Comment } from './types.js';
|
||||
|
||||
const PLATFORM = 'xiaoheihe';
|
||||
const DEFAULT_PAGE_SIZE = 20;
|
||||
const MAX_PAGE_SIZE = 200;
|
||||
|
||||
type McpMeta = Record<string, unknown>;
|
||||
|
||||
function ok(data: unknown, meta?: McpMeta): McpToolResult {
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: JSON.stringify({
|
||||
success: true,
|
||||
data,
|
||||
meta: meta ?? {},
|
||||
}),
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
function clampPageSize(maxCount?: number): number {
|
||||
return Math.min(MAX_PAGE_SIZE, Math.max(1, maxCount ?? DEFAULT_PAGE_SIZE));
|
||||
}
|
||||
|
||||
function paginationMeta(
|
||||
cursor: string | undefined,
|
||||
maxCount: number,
|
||||
returned: number,
|
||||
nextCursor?: string,
|
||||
): McpMeta {
|
||||
return {
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: cursor ?? '',
|
||||
max_count: maxCount,
|
||||
returned,
|
||||
...(nextCursor ? { next_cursor: nextCursor } : {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function runWithIdempotency<T>(
|
||||
toolName: string,
|
||||
requestId: string | undefined,
|
||||
inputForHash: unknown,
|
||||
execute: () => Promise<T>,
|
||||
): Promise<{ data: T; meta?: McpMeta }> {
|
||||
if (!requestId) {
|
||||
return { data: await execute() };
|
||||
}
|
||||
|
||||
const store = getIdempotencyStore();
|
||||
const inputHash = computeIdempotencyHash(inputForHash);
|
||||
const existing = store.get(toolName, requestId);
|
||||
|
||||
if (existing) {
|
||||
if (existing.inputHash !== inputHash) {
|
||||
throw new Error('request_id already used with different parameters');
|
||||
}
|
||||
return {
|
||||
data: existing.responseData as T,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: true,
|
||||
first_processed_at: existing.createdAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const data = await execute();
|
||||
store.put(toolName, requestId, inputHash, data);
|
||||
return {
|
||||
data,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function compareCommentKey(a: Comment, b: Comment): number {
|
||||
const timeCmp = a.createTime.localeCompare(b.createTime);
|
||||
if (timeCmp !== 0) return timeCmp;
|
||||
return a.id.localeCompare(b.id);
|
||||
}
|
||||
|
||||
export const xiaoheihePlugin: PlatformPlugin = {
|
||||
name: PLATFORM,
|
||||
apiNamespace: 'xhh',
|
||||
|
||||
registerRoutes(router: Router, browser: BrowserManager): void {
|
||||
const xhhRouter = createXhhRoutes(browser);
|
||||
router.use('/', xhhRouter);
|
||||
},
|
||||
|
||||
registerTools(server: McpServer, browser: BrowserManager): void {
|
||||
server.tool(
|
||||
'xhh_check_login',
|
||||
'Check Xiaoheihe login status',
|
||||
CheckLoginSchema,
|
||||
async () => withErrorHandling('xhh_check_login', async () => {
|
||||
const timeoutMs = config.operationTimeouts['login'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const status = await browser.withPage(PLATFORM, async (page) => checkLoginStatus(page), timeoutMs);
|
||||
return ok({
|
||||
logged_in: status.loggedIn,
|
||||
...(status.username ? { username: status.username } : {}),
|
||||
...(status.avatar ? { avatar: status.avatar } : {}),
|
||||
...(status.userId ? { user_id: status.userId } : {}),
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_login_qrcode',
|
||||
'Get Xiaoheihe login QR code',
|
||||
GetLoginQRCodeSchema,
|
||||
async () => withErrorHandling('xhh_get_login_qrcode', async () => {
|
||||
const qr = await getLoginQRCode(browser);
|
||||
return ok({
|
||||
qrcode_data: qr.qrcodeData,
|
||||
already_logged_in: qr.alreadyLoggedIn,
|
||||
timeout: qr.timeout,
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_delete_cookies',
|
||||
'Delete Xiaoheihe cookies and reset login session',
|
||||
DeleteCookiesSchema,
|
||||
async () => withErrorHandling('xhh_delete_cookies', async () => {
|
||||
await deleteCookies(browser);
|
||||
return ok({ deleted: true });
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_list_feeds',
|
||||
'List Xiaoheihe feed cards',
|
||||
ListFeedsSchema,
|
||||
async (args) => withErrorHandling('xhh_list_feeds', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const feeds = await browser.withPage(PLATFORM, async (page) => listFeeds(page), timeoutMs);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(feeds, limit, cursor, (item) => item.id);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_search',
|
||||
'Search Xiaoheihe posts by keyword',
|
||||
SearchSchema,
|
||||
async (args) => withErrorHandling('xhh_search', async () => {
|
||||
const timeoutMs = config.operationTimeouts['search'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const feeds = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => searchFeeds(page, args.keyword),
|
||||
timeoutMs,
|
||||
);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(feeds, limit, cursor, (item) => item.id);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_feed_detail',
|
||||
'Get Xiaoheihe feed detail with first-screen comments',
|
||||
GetFeedDetailSchema,
|
||||
async (args) => withErrorHandling('xhh_get_feed_detail', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const target = resolveFeedTarget({
|
||||
link_id: args.link_id,
|
||||
url: args.url,
|
||||
});
|
||||
const detail = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getFeedDetail(page, target.linkId),
|
||||
timeoutMs,
|
||||
);
|
||||
const { comments, ...rest } = detail;
|
||||
return ok({
|
||||
detail: rest,
|
||||
first_screen_comments: comments,
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_sub_comments',
|
||||
'Get sub-comments for a Xiaoheihe comment with keyset pagination',
|
||||
GetSubCommentsSchema,
|
||||
async (args) => withErrorHandling('xhh_get_sub_comments', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const loaded = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getSubComments(page, args.link_id, args.comment_id, MAX_PAGE_SIZE),
|
||||
timeoutMs,
|
||||
);
|
||||
const sorted = [...loaded].sort(compareCommentKey);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(
|
||||
sorted,
|
||||
limit,
|
||||
cursor,
|
||||
(item) => `${item.createTime}|${item.id}`,
|
||||
);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_get_user_profile',
|
||||
'Get Xiaoheihe user profile',
|
||||
GetUserProfileSchema,
|
||||
async (args) => withErrorHandling('xhh_get_user_profile', async () => {
|
||||
const timeoutMs = config.operationTimeouts['user_profile'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const target = resolveUserTarget({
|
||||
user_id: args.user_id,
|
||||
url: args.url,
|
||||
});
|
||||
const profile = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getUserProfile(page, target.userId),
|
||||
timeoutMs,
|
||||
);
|
||||
return ok({
|
||||
profile: {
|
||||
id: profile.id,
|
||||
nickname: profile.nickname,
|
||||
avatar: profile.avatar,
|
||||
description: profile.description,
|
||||
follows: profile.follows,
|
||||
fans: profile.fans,
|
||||
likes: profile.likes,
|
||||
},
|
||||
recent_posts: profile.posts,
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_list_my_posts',
|
||||
'List my Xiaoheihe posts',
|
||||
ListMyPostsSchema,
|
||||
async (args) => withErrorHandling('xhh_list_my_posts', async () => {
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const posts = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => listMyPosts(page, args.type),
|
||||
timeoutMs,
|
||||
);
|
||||
const limit = clampPageSize(args.max_count);
|
||||
const cursor = decodeKeysetCursor(args.cursor);
|
||||
const paged = paginateByKeyset(posts, limit, cursor, (item) => `${item.modifyTime ?? item.createTime ?? ''}|${item.id}`);
|
||||
return ok(
|
||||
paged.items,
|
||||
paginationMeta(args.cursor, limit, paged.items.length, paged.nextCursor),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_post_comment',
|
||||
'Post a comment on Xiaoheihe',
|
||||
PostCommentSchema,
|
||||
async (args) => withErrorHandling('xhh_post_comment', async () => {
|
||||
const timeoutMs = config.operationTimeouts['comment'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_post_comment',
|
||||
args.request_id,
|
||||
{
|
||||
link_id: args.link_id,
|
||||
content: args.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => postComment(page, args.link_id, args.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
return ok(result.data, result.meta);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_reply_comment',
|
||||
'Reply a comment on Xiaoheihe',
|
||||
ReplyCommentSchema,
|
||||
async (args) => withErrorHandling('xhh_reply_comment', async () => {
|
||||
const timeoutMs = config.operationTimeouts['reply'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_reply_comment',
|
||||
args.request_id,
|
||||
{
|
||||
link_id: args.link_id,
|
||||
comment_id: args.comment_id,
|
||||
content: args.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => replyComment(page, args.link_id, args.comment_id, args.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
return ok(result.data, result.meta);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_set_like_state',
|
||||
'Set like state for a Xiaoheihe post',
|
||||
SetLikeStateSchema,
|
||||
async (args) => withErrorHandling('xhh_set_like_state', async () => {
|
||||
const timeoutMs = config.operationTimeouts['like'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setLikeState(page, args.link_id, args.liked),
|
||||
timeoutMs,
|
||||
);
|
||||
return ok(result);
|
||||
}),
|
||||
);
|
||||
|
||||
server.tool(
|
||||
'xhh_set_favorite_state',
|
||||
'Set favorite state for a Xiaoheihe post',
|
||||
SetFavoriteStateSchema,
|
||||
async (args) => withErrorHandling('xhh_set_favorite_state', async () => {
|
||||
const timeoutMs = config.operationTimeouts['favorite'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setFavoriteState(page, args.link_id, args.favorited),
|
||||
timeoutMs,
|
||||
);
|
||||
return ok(result);
|
||||
}),
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import { detectCaptchaText } from './extractors.js';
|
||||
|
||||
function buildDetailUrl(linkId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/bbs/link/${encodeURIComponent(linkId)}`;
|
||||
}
|
||||
|
||||
export async function setLikeState(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
targetState: boolean,
|
||||
): Promise<{ success: boolean; state: boolean; changed: boolean }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on interaction page');
|
||||
}
|
||||
|
||||
const current = await readButtonState(page, XHH_SELECTORS.detail.likeButton);
|
||||
if (current === targetState) {
|
||||
return { success: true, state: current, changed: false };
|
||||
}
|
||||
|
||||
const clicked = await clickAny(page, XHH_SELECTORS.detail.likeButton);
|
||||
if (!clicked) {
|
||||
return { success: false, state: current, changed: false };
|
||||
}
|
||||
await page.waitForTimeout(700);
|
||||
const state = await readButtonState(page, XHH_SELECTORS.detail.likeButton);
|
||||
return {
|
||||
success: state === targetState,
|
||||
state,
|
||||
changed: state !== current,
|
||||
};
|
||||
}
|
||||
|
||||
export async function setFavoriteState(
|
||||
page: Page,
|
||||
linkId: string,
|
||||
targetState: boolean,
|
||||
): Promise<{ success: boolean; state: boolean; changed: boolean }> {
|
||||
await page.goto(buildDetailUrl(linkId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_000);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on interaction page');
|
||||
}
|
||||
|
||||
const current = await readButtonState(page, XHH_SELECTORS.detail.favoriteButton);
|
||||
if (current === targetState) {
|
||||
return { success: true, state: current, changed: false };
|
||||
}
|
||||
|
||||
const clicked = await clickAny(page, XHH_SELECTORS.detail.favoriteButton);
|
||||
if (!clicked) {
|
||||
return { success: false, state: current, changed: false };
|
||||
}
|
||||
await page.waitForTimeout(700);
|
||||
const state = await readButtonState(page, XHH_SELECTORS.detail.favoriteButton);
|
||||
return {
|
||||
success: state === targetState,
|
||||
state,
|
||||
changed: state !== current,
|
||||
};
|
||||
}
|
||||
|
||||
async function clickAny(page: Page, selectors: readonly string[]): Promise<boolean> {
|
||||
for (const selector of selectors) {
|
||||
const ok = await page.locator(selector).first().click({ timeout: 2_000 }).then(() => true).catch(() => false);
|
||||
if (ok) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function readButtonState(page: Page, selectors: readonly string[]): Promise<boolean> {
|
||||
for (const selector of selectors) {
|
||||
const state = await page
|
||||
.evaluate((sel) => {
|
||||
const node = document.querySelector(sel) as HTMLElement | null;
|
||||
if (!node) return null;
|
||||
if (node.getAttribute('aria-pressed') === 'true') return true;
|
||||
const cls = node.className.toString().toLowerCase();
|
||||
if (cls.includes('active') || cls.includes('selected')) return true;
|
||||
const html = node.innerHTML.toLowerCase();
|
||||
if (html.includes('filled') || html.includes('checked')) return true;
|
||||
return false;
|
||||
}, selector)
|
||||
.catch(() => null);
|
||||
if (typeof state === 'boolean') return state;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { cookieStore } from '@social/core/cookie/store.js';
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { LoginStatus, QRCodeResult } from './types.js';
|
||||
import { extractUserIdFromUrl, firstNonEmpty } from './extractors.js';
|
||||
|
||||
const PLATFORM = 'xiaoheihe';
|
||||
const HOME_URL = 'https://www.xiaoheihe.cn/app/bbs/home';
|
||||
const QR_SCAN_TIMEOUT_MS = 4 * 60_000;
|
||||
const LOGIN_SAFETY_TIMEOUT_MS = 5 * 60_000;
|
||||
|
||||
const log = logger.child({ module: 'xhh-login' });
|
||||
|
||||
export async function checkLoginStatus(page: Page): Promise<LoginStatus> {
|
||||
await page.goto(HOME_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const indicator = await waitFirstSelector(page, XHH_SELECTORS.login.loggedInIndicators, 4_000);
|
||||
if (!indicator) {
|
||||
return { loggedIn: false };
|
||||
}
|
||||
|
||||
const username = firstNonEmpty(
|
||||
await textFromSelector(page, XHH_SELECTORS.login.username),
|
||||
await indicator.textContent().catch(() => ''),
|
||||
);
|
||||
|
||||
const avatar = await attrFromSelector(page, XHH_SELECTORS.login.avatar, 'src');
|
||||
const userLink = await attrFromSelector(page, XHH_SELECTORS.login.userLink, 'href');
|
||||
const userId = userLink ? extractUserIdFromUrl(userLink) : undefined;
|
||||
|
||||
return {
|
||||
loggedIn: true,
|
||||
...(username ? { username } : {}),
|
||||
...(avatar ? { avatar } : {}),
|
||||
...(userId ? { userId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export async function getLoginQRCode(browser: BrowserManager): Promise<QRCodeResult> {
|
||||
const { page, release } = await browser.acquirePage(PLATFORM);
|
||||
|
||||
const releaseTimer = setTimeout(() => {
|
||||
void release();
|
||||
}, LOGIN_SAFETY_TIMEOUT_MS);
|
||||
if (typeof releaseTimer === 'object' && 'unref' in releaseTimer) {
|
||||
releaseTimer.unref();
|
||||
}
|
||||
|
||||
try {
|
||||
await page.goto(HOME_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const status = await checkLoginStatus(page);
|
||||
if (status.loggedIn) {
|
||||
await release();
|
||||
clearTimeout(releaseTimer);
|
||||
return {
|
||||
qrcodeData: '',
|
||||
alreadyLoggedIn: true,
|
||||
timeout: '0',
|
||||
};
|
||||
}
|
||||
|
||||
const loginBtn = await page.$(XHH_SELECTORS.login.loginButton).catch(() => null);
|
||||
if (loginBtn) {
|
||||
await loginBtn.click().catch(() => {});
|
||||
await page.waitForTimeout(500);
|
||||
}
|
||||
|
||||
const qrcodeData = await extractQrCodeData(page);
|
||||
if (!qrcodeData) {
|
||||
await release();
|
||||
clearTimeout(releaseTimer);
|
||||
throw new Error('waiting for selector: xhh login qrcode');
|
||||
}
|
||||
|
||||
waitForLoginAndRelease(page, browser, release).catch((err: unknown) => {
|
||||
log.warn({ err }, 'background login wait failed');
|
||||
});
|
||||
|
||||
return {
|
||||
qrcodeData,
|
||||
alreadyLoggedIn: false,
|
||||
timeout: '4m',
|
||||
};
|
||||
} catch (err) {
|
||||
clearTimeout(releaseTimer);
|
||||
await release();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
export async function deleteCookies(browser: BrowserManager): Promise<void> {
|
||||
await cookieStore.delete(PLATFORM);
|
||||
await browser.clearContext(PLATFORM);
|
||||
}
|
||||
|
||||
async function waitForLoginAndRelease(
|
||||
page: Page,
|
||||
browser: BrowserManager,
|
||||
release: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
try {
|
||||
await waitFirstSelector(page, XHH_SELECTORS.login.loggedInIndicators, QR_SCAN_TIMEOUT_MS);
|
||||
await browser.saveCookies(PLATFORM);
|
||||
await browser.clearContext(PLATFORM);
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
}
|
||||
|
||||
async function waitFirstSelector(
|
||||
page: Page,
|
||||
selectors: readonly string[],
|
||||
timeout: number,
|
||||
) {
|
||||
const started = Date.now();
|
||||
for (const selector of selectors) {
|
||||
const remaining = Math.max(1, timeout - (Date.now() - started));
|
||||
const handle = await page.waitForSelector(selector, { timeout: remaining }).catch(() => null);
|
||||
if (handle) return handle;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function extractQrCodeData(page: Page): Promise<string> {
|
||||
for (const selector of XHH_SELECTORS.login.qrCodeImage) {
|
||||
const data = await page
|
||||
.evaluate((sel) => {
|
||||
const node = document.querySelector(sel);
|
||||
if (!node) return '';
|
||||
if (node instanceof HTMLImageElement) {
|
||||
return node.src || '';
|
||||
}
|
||||
if (node instanceof HTMLCanvasElement) {
|
||||
try {
|
||||
return node.toDataURL();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}, selector)
|
||||
.catch(() => '');
|
||||
if (data) return data;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
async function textFromSelector(page: Page, selector: string): Promise<string> {
|
||||
return page
|
||||
.$eval(selector, (el) => (el.textContent ?? '').trim())
|
||||
.catch(() => '');
|
||||
}
|
||||
|
||||
async function attrFromSelector(
|
||||
page: Page,
|
||||
selector: string,
|
||||
attr: string,
|
||||
): Promise<string> {
|
||||
return page
|
||||
.$eval(selector, (el, attrName) => el.getAttribute(attrName) ?? '', attr)
|
||||
.catch(() => '');
|
||||
}
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { MyPost, MyPostType } from './types.js';
|
||||
import { detectCaptchaText, extractLinkIdFromUrl, parseCountString } from './extractors.js';
|
||||
|
||||
const URL = 'https://www.xiaoheihe.cn/creator/content_management/home';
|
||||
const log = logger.child({ module: 'xhh-my-posts' });
|
||||
|
||||
const TAB_KEYWORDS: Record<MyPostType, string> = {
|
||||
all: '全部',
|
||||
article: '文章',
|
||||
image_text: '图文',
|
||||
video: '视频',
|
||||
};
|
||||
|
||||
export async function listMyPosts(
|
||||
page: Page,
|
||||
type: MyPostType = 'all',
|
||||
): Promise<MyPost[]> {
|
||||
await page.goto(URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on my-posts page');
|
||||
}
|
||||
|
||||
if (type !== 'all') {
|
||||
const keyword = TAB_KEYWORDS[type];
|
||||
const tabs = page.locator(XHH_SELECTORS.myPosts.tabButton);
|
||||
const count = await tabs.count().catch(() => 0);
|
||||
for (let i = 0; i < count; i += 1) {
|
||||
const tab = tabs.nth(i);
|
||||
const tabText = (await tab.textContent().catch(() => '')) ?? '';
|
||||
if (tabText.includes(keyword)) {
|
||||
await tab.click().catch(() => {});
|
||||
await page.waitForTimeout(500);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const rawItems = await page.evaluate((selectors) => {
|
||||
const container = [...document.querySelectorAll<HTMLElement>(selectors.myPosts.postItem)];
|
||||
return container.map((node) => {
|
||||
const linkNode = node.querySelector<HTMLAnchorElement>(selectors.myPosts.postLink);
|
||||
const href = linkNode?.getAttribute('href') ?? '';
|
||||
const title = (node.querySelector(selectors.myPosts.title)?.textContent ?? '').trim();
|
||||
const description = (node.querySelector(selectors.myPosts.description)?.textContent ?? '').trim();
|
||||
const time = (node.querySelector(selectors.myPosts.time)?.textContent ?? '').trim();
|
||||
const likeRaw = (node.querySelector('.like-count, .content-list__like-cnt')?.textContent ?? '').trim();
|
||||
const commentRaw = (node.querySelector('.comment-count, .content-list__comment-cnt')?.textContent ?? '').trim();
|
||||
const cover = (node.querySelector('img') as HTMLImageElement | null)?.src ?? '';
|
||||
return { href, title, description, time, likeRaw, commentRaw, cover };
|
||||
});
|
||||
}, XHH_SELECTORS);
|
||||
|
||||
const posts: MyPost[] = [];
|
||||
for (const item of rawItems) {
|
||||
const linkId = extractLinkIdFromUrl(item.href);
|
||||
if (!linkId) continue;
|
||||
const linkUrl = item.href.startsWith('http')
|
||||
? item.href
|
||||
: `https://www.xiaoheihe.cn${item.href}`;
|
||||
posts.push({
|
||||
id: linkId,
|
||||
type,
|
||||
title: item.title,
|
||||
description: item.description,
|
||||
coverUrl: item.cover,
|
||||
likeCount: parseCountString(item.likeRaw),
|
||||
commentCount: parseCountString(item.commentRaw),
|
||||
user: {
|
||||
id: '',
|
||||
nickname: '',
|
||||
avatar: '',
|
||||
},
|
||||
linkUrl,
|
||||
createTime: item.time,
|
||||
modifyTime: item.time,
|
||||
});
|
||||
}
|
||||
|
||||
log.info({ type, count: posts.length }, 'xhh my posts listed');
|
||||
return posts;
|
||||
}
|
||||
@@ -0,0 +1,464 @@
|
||||
import { Router } from 'express';
|
||||
import { z, ZodError } from 'zod';
|
||||
|
||||
import type { BrowserManager } from '@social/core/browser/manager.js';
|
||||
import { config } from '@social/core/config/index.js';
|
||||
import { rateLimiter } from '@social/core/server/middleware.js';
|
||||
import { classifyError, sanitizeErrorMessage } from '@social/core/utils/errors.js';
|
||||
import { computeIdempotencyHash, getIdempotencyStore } from '@social/core/utils/idempotency.js';
|
||||
import { decodeKeysetCursor, paginateByKeyset } from './cursor.js';
|
||||
import { postComment, replyComment } from './comment.js';
|
||||
import { getFeedDetail, getSubComments } from './feed-detail.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
import { setFavoriteState, setLikeState } from './interaction.js';
|
||||
import { checkLoginStatus, deleteCookies, getLoginQRCode } from './login.js';
|
||||
import { listMyPosts } from './my-posts.js';
|
||||
import {
|
||||
GetFeedDetailSchema,
|
||||
GetSubCommentsSchema,
|
||||
GetUserProfileSchema,
|
||||
ListMyPostsSchema,
|
||||
PostCommentSchema,
|
||||
ReplyCommentSchema,
|
||||
SearchSchema,
|
||||
SetFavoriteStateSchema,
|
||||
SetLikeStateSchema,
|
||||
} from './schemas.js';
|
||||
import { searchFeeds } from './search.js';
|
||||
import { resolveFeedTarget, resolveUserTarget } from './target-resolver.js';
|
||||
import { getUserProfile } from './user-profile.js';
|
||||
|
||||
const PLATFORM = 'xiaoheihe';
|
||||
const DEFAULT_PAGE_SIZE = 20;
|
||||
const MAX_PAGE_SIZE = 200;
|
||||
|
||||
const readRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 60 });
|
||||
const writeRateLimiter = rateLimiter({ windowMs: 60_000, maxRequests: 10 });
|
||||
|
||||
const SearchBodySchema = z.object({
|
||||
keyword: SearchSchema.keyword,
|
||||
max_count: SearchSchema.max_count,
|
||||
cursor: SearchSchema.cursor,
|
||||
});
|
||||
|
||||
const FeedDetailBodySchema = z.object({
|
||||
link_id: GetFeedDetailSchema.link_id,
|
||||
url: GetFeedDetailSchema.url,
|
||||
});
|
||||
|
||||
const SubCommentsBodySchema = z.object({
|
||||
link_id: GetSubCommentsSchema.link_id,
|
||||
comment_id: GetSubCommentsSchema.comment_id,
|
||||
max_count: GetSubCommentsSchema.max_count,
|
||||
cursor: GetSubCommentsSchema.cursor,
|
||||
});
|
||||
|
||||
const UserProfileBodySchema = z.object({
|
||||
user_id: GetUserProfileSchema.user_id,
|
||||
url: GetUserProfileSchema.url,
|
||||
});
|
||||
|
||||
const PostCommentBodySchema = z.object({
|
||||
request_id: PostCommentSchema.request_id,
|
||||
link_id: PostCommentSchema.link_id,
|
||||
content: PostCommentSchema.content,
|
||||
});
|
||||
|
||||
const ReplyCommentBodySchema = z.object({
|
||||
request_id: ReplyCommentSchema.request_id,
|
||||
link_id: ReplyCommentSchema.link_id,
|
||||
comment_id: ReplyCommentSchema.comment_id,
|
||||
content: ReplyCommentSchema.content,
|
||||
});
|
||||
|
||||
const LikeBodySchema = z.object({
|
||||
link_id: SetLikeStateSchema.link_id,
|
||||
liked: SetLikeStateSchema.liked,
|
||||
});
|
||||
|
||||
const FavoriteBodySchema = z.object({
|
||||
link_id: SetFavoriteStateSchema.link_id,
|
||||
favorited: SetFavoriteStateSchema.favorited,
|
||||
});
|
||||
|
||||
interface ApiSuccessResponse<T> {
|
||||
success: true;
|
||||
data: T;
|
||||
}
|
||||
|
||||
interface ApiErrorResponse {
|
||||
success: false;
|
||||
error: {
|
||||
code: string;
|
||||
message: string;
|
||||
};
|
||||
}
|
||||
|
||||
function successResponse<T>(data: T): ApiSuccessResponse<T> {
|
||||
return { success: true, data };
|
||||
}
|
||||
|
||||
function errorResponse(code: string, message: string): ApiErrorResponse {
|
||||
return { success: false, error: { code, message } };
|
||||
}
|
||||
|
||||
function clampPageSize(maxCount?: number): number {
|
||||
return Math.min(MAX_PAGE_SIZE, Math.max(1, maxCount ?? DEFAULT_PAGE_SIZE));
|
||||
}
|
||||
|
||||
async function runWithIdempotency<T>(
|
||||
toolName: string,
|
||||
requestId: string | undefined,
|
||||
inputForHash: unknown,
|
||||
execute: () => Promise<T>,
|
||||
): Promise<{ data: T; meta?: Record<string, unknown> }> {
|
||||
if (!requestId) {
|
||||
return { data: await execute() };
|
||||
}
|
||||
|
||||
const store = getIdempotencyStore();
|
||||
const inputHash = computeIdempotencyHash(inputForHash);
|
||||
const existing = store.get(toolName, requestId);
|
||||
if (existing) {
|
||||
if (existing.inputHash !== inputHash) {
|
||||
throw new Error('request_id already used with different parameters');
|
||||
}
|
||||
return {
|
||||
data: existing.responseData as T,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: true,
|
||||
first_processed_at: existing.createdAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const data = await execute();
|
||||
store.put(toolName, requestId, inputHash, data);
|
||||
return {
|
||||
data,
|
||||
meta: {
|
||||
request_id: requestId,
|
||||
idempotent_replay: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function createXhhRoutes(browser: BrowserManager): Router {
|
||||
const router = Router();
|
||||
|
||||
router.get('/login/status', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const timeoutMs = config.operationTimeouts['login'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const status = await browser.withPage(PLATFORM, async (page) => checkLoginStatus(page), timeoutMs);
|
||||
res.json(successResponse({
|
||||
logged_in: status.loggedIn,
|
||||
...(status.username ? { username: status.username } : {}),
|
||||
...(status.avatar ? { avatar: status.avatar } : {}),
|
||||
...(status.userId ? { user_id: status.userId } : {}),
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.get('/login/qrcode', readRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const qr = await getLoginQRCode(browser);
|
||||
res.json(successResponse({
|
||||
qrcode_data: qr.qrcodeData,
|
||||
already_logged_in: qr.alreadyLoggedIn,
|
||||
timeout: qr.timeout,
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.delete('/login/cookies', writeRateLimiter, (_req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
await deleteCookies(browser);
|
||||
res.json(successResponse({ deleted: true }));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.get('/feeds', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const query = z.object({
|
||||
max_count: z.coerce.number().int().min(1).max(200).optional().default(20),
|
||||
cursor: z.string().optional(),
|
||||
}).parse(req.query);
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const feeds = await browser.withPage(PLATFORM, async (page) => listFeeds(page), timeoutMs);
|
||||
const limit = clampPageSize(query.max_count);
|
||||
const paged = paginateByKeyset(feeds, limit, decodeKeysetCursor(query.cursor), (item) => item.id);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: query.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/search', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = SearchBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['search'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const items = await browser.withPage(PLATFORM, async (page) => searchFeeds(page, body.keyword), timeoutMs);
|
||||
const limit = clampPageSize(body.max_count);
|
||||
const paged = paginateByKeyset(items, limit, decodeKeysetCursor(body.cursor), (item) => item.id);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: body.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/feeds/detail', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FeedDetailBodySchema.parse(req.body);
|
||||
const target = resolveFeedTarget(body);
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const detail = await browser.withPage(PLATFORM, async (page) => getFeedDetail(page, target.linkId), timeoutMs);
|
||||
const { comments, ...rest } = detail;
|
||||
res.json(successResponse({
|
||||
detail: rest,
|
||||
first_screen_comments: comments,
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/feeds/sub-comments', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = SubCommentsBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['feed_detail'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const loaded = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => getSubComments(page, body.link_id, body.comment_id, MAX_PAGE_SIZE),
|
||||
timeoutMs,
|
||||
);
|
||||
const sorted = [...loaded].sort((a, b) => {
|
||||
const timeCmp = a.createTime.localeCompare(b.createTime);
|
||||
if (timeCmp !== 0) return timeCmp;
|
||||
return a.id.localeCompare(b.id);
|
||||
});
|
||||
const limit = clampPageSize(body.max_count);
|
||||
const paged = paginateByKeyset(
|
||||
sorted,
|
||||
limit,
|
||||
decodeKeysetCursor(body.cursor),
|
||||
(item) => `${item.createTime}|${item.id}`,
|
||||
);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: body.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/user/profile', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = UserProfileBodySchema.parse(req.body);
|
||||
const target = resolveUserTarget(body);
|
||||
const timeoutMs = config.operationTimeouts['user_profile'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const profile = await browser.withPage(PLATFORM, async (page) => getUserProfile(page, target.userId), timeoutMs);
|
||||
res.json(successResponse({
|
||||
profile: {
|
||||
id: profile.id,
|
||||
nickname: profile.nickname,
|
||||
avatar: profile.avatar,
|
||||
description: profile.description,
|
||||
follows: profile.follows,
|
||||
fans: profile.fans,
|
||||
likes: profile.likes,
|
||||
},
|
||||
recent_posts: profile.posts,
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.get('/my-posts', readRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const query = z.object({
|
||||
type: ListMyPostsSchema.type,
|
||||
max_count: z.coerce.number().int().min(1).max(200).optional().default(20),
|
||||
cursor: z.string().optional(),
|
||||
}).parse(req.query);
|
||||
const timeoutMs = config.operationTimeouts['feed_list'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const posts = await browser.withPage(PLATFORM, async (page) => listMyPosts(page, query.type), timeoutMs);
|
||||
const limit = clampPageSize(query.max_count);
|
||||
const paged = paginateByKeyset(
|
||||
posts,
|
||||
limit,
|
||||
decodeKeysetCursor(query.cursor),
|
||||
(item) => `${item.modifyTime ?? item.createTime ?? ''}|${item.id}`,
|
||||
);
|
||||
res.json(successResponse({
|
||||
items: paged.items,
|
||||
pagination: {
|
||||
mode: 'keyset',
|
||||
cursor: query.cursor ?? '',
|
||||
max_count: limit,
|
||||
returned: paged.items.length,
|
||||
...(paged.nextCursor ? { next_cursor: paged.nextCursor } : {}),
|
||||
},
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/comment', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = PostCommentBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['comment'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_post_comment',
|
||||
body.request_id,
|
||||
{
|
||||
link_id: body.link_id,
|
||||
content: body.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => postComment(page, body.link_id, body.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
res.json(successResponse({
|
||||
...result.data,
|
||||
...(result.meta ? { meta: result.meta } : {}),
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/comment/reply', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = ReplyCommentBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['reply'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await runWithIdempotency(
|
||||
'xhh_reply_comment',
|
||||
body.request_id,
|
||||
{
|
||||
link_id: body.link_id,
|
||||
comment_id: body.comment_id,
|
||||
content: body.content,
|
||||
},
|
||||
async () => browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => replyComment(page, body.link_id, body.comment_id, body.content),
|
||||
timeoutMs,
|
||||
),
|
||||
);
|
||||
res.json(successResponse({
|
||||
...result.data,
|
||||
...(result.meta ? { meta: result.meta } : {}),
|
||||
}));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/like/set-state', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = LikeBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['like'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setLikeState(page, body.link_id, body.liked),
|
||||
timeoutMs,
|
||||
);
|
||||
res.json(successResponse(result));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
router.post('/favorite/set-state', writeRateLimiter, (req, res) => {
|
||||
void (async () => {
|
||||
try {
|
||||
const body = FavoriteBodySchema.parse(req.body);
|
||||
const timeoutMs = config.operationTimeouts['favorite'] ?? config.operationTimeouts['default'] ?? 60_000;
|
||||
const result = await browser.withPage(
|
||||
PLATFORM,
|
||||
async (page) => setFavoriteState(page, body.link_id, body.favorited),
|
||||
timeoutMs,
|
||||
);
|
||||
res.json(successResponse(result));
|
||||
} catch (err) {
|
||||
handleError(res, err);
|
||||
}
|
||||
})();
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
|
||||
function handleError(res: { status: (n: number) => { json: (body: ApiErrorResponse) => void } }, err: unknown): void {
|
||||
if (err instanceof ZodError) {
|
||||
const detail = err.issues.map((issue) => `${issue.path.join('.') || '<root>'}: ${issue.message}`).join('; ');
|
||||
res.status(400).json(errorResponse('VALIDATION_ERROR', detail));
|
||||
return;
|
||||
}
|
||||
|
||||
const e = err instanceof Error ? err : new Error(String(err));
|
||||
const category = classifyError(e);
|
||||
const message = sanitizeErrorMessage(e.message);
|
||||
const statusCode = category === 'AUTH_REQUIRED' ? 401 : 500;
|
||||
res.status(statusCode).json(errorResponse(category, message));
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
export const CheckLoginSchema = {};
|
||||
export const GetLoginQRCodeSchema = {};
|
||||
export const DeleteCookiesSchema = {};
|
||||
|
||||
export const ListFeedsSchema = {
|
||||
max_count: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(200)
|
||||
.optional()
|
||||
.default(20)
|
||||
.describe('Maximum number of feeds to return per page (1-200, default 20)'),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Keyset pagination cursor returned by previous call'),
|
||||
};
|
||||
|
||||
export const SearchSchema = {
|
||||
keyword: z.string().min(1).describe('Search keyword'),
|
||||
max_count: ListFeedsSchema.max_count,
|
||||
cursor: ListFeedsSchema.cursor,
|
||||
};
|
||||
|
||||
export const GetFeedDetailSchema = {
|
||||
link_id: z.string().optional().describe('Link ID (required when url is absent)'),
|
||||
url: z.string().optional().describe('Detail page URL (auto-parse link_id)'),
|
||||
};
|
||||
|
||||
export const GetSubCommentsSchema = {
|
||||
link_id: z.string().describe('Link ID'),
|
||||
comment_id: z.string().describe('Parent comment ID'),
|
||||
max_count: ListFeedsSchema.max_count,
|
||||
cursor: ListFeedsSchema.cursor,
|
||||
};
|
||||
|
||||
export const GetUserProfileSchema = {
|
||||
user_id: z.string().optional().describe('User ID (required when url is absent)'),
|
||||
url: z.string().optional().describe('User profile URL (auto-parse user_id)'),
|
||||
};
|
||||
|
||||
export const ListMyPostsSchema = {
|
||||
type: z
|
||||
.enum(['all', 'article', 'image_text', 'video'])
|
||||
.optional()
|
||||
.default('all')
|
||||
.describe('Post type filter'),
|
||||
max_count: ListFeedsSchema.max_count,
|
||||
cursor: ListFeedsSchema.cursor,
|
||||
};
|
||||
|
||||
export const PostCommentSchema = {
|
||||
request_id: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(128)
|
||||
.optional()
|
||||
.describe('Optional idempotency key'),
|
||||
link_id: z.string().describe('Link ID'),
|
||||
content: z.string().min(1).describe('Comment content'),
|
||||
};
|
||||
|
||||
export const ReplyCommentSchema = {
|
||||
request_id: PostCommentSchema.request_id,
|
||||
link_id: z.string().describe('Link ID'),
|
||||
comment_id: z.string().describe('Target comment ID'),
|
||||
content: z.string().min(1).describe('Reply content'),
|
||||
};
|
||||
|
||||
export const SetLikeStateSchema = {
|
||||
link_id: z.string().describe('Link ID'),
|
||||
liked: z.boolean().describe('Target like state'),
|
||||
};
|
||||
|
||||
export const SetFavoriteStateSchema = {
|
||||
link_id: z.string().describe('Link ID'),
|
||||
favorited: z.boolean().describe('Target favorite state'),
|
||||
};
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import type { Feed } from './types.js';
|
||||
import { searchFeeds as runSearch } from './feeds.js';
|
||||
|
||||
export async function searchFeeds(page: Page, keyword: string): Promise<Feed[]> {
|
||||
return runSearch(page, keyword);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
export const XHH_SELECTORS = {
|
||||
login: {
|
||||
loggedInIndicators: [
|
||||
'.user-profile-user-head',
|
||||
'.user-info .user-name',
|
||||
'.view-header__user-box',
|
||||
],
|
||||
loginButton: '.user-box__login, .login-btn, button:has-text("登录")',
|
||||
qrCodeImage: [
|
||||
'#login-qrcode img',
|
||||
'#login-qrcode canvas',
|
||||
'.qr-code-wrapper img',
|
||||
'.qrcode-box img',
|
||||
'img[src*="qrcode"]',
|
||||
],
|
||||
username: '.user-profile-user-head .name, .user-info .user-name',
|
||||
avatar: '.user-profile-user-head img, .user-info img.user-image',
|
||||
userLink: 'a[href*="/app/user/profile/"]',
|
||||
},
|
||||
|
||||
feed: {
|
||||
card: [
|
||||
'.content-management-home__content',
|
||||
'.hb-cpt__moment-list-content',
|
||||
'.related-recommend__link-item--content',
|
||||
'.bbs-home__content-list > *',
|
||||
],
|
||||
link: 'a[href*="/app/bbs/link/"]',
|
||||
title: [
|
||||
'.link-item__title',
|
||||
'.content-list__title',
|
||||
'.article-title .title',
|
||||
'.title',
|
||||
],
|
||||
description: [
|
||||
'.link-item__desc',
|
||||
'.content-list__desc',
|
||||
'.article-desc',
|
||||
'.desc',
|
||||
],
|
||||
cover: 'img',
|
||||
userLink: 'a[href*="/app/user/profile/"]',
|
||||
userName: [
|
||||
'.list-content__username',
|
||||
'.user-name',
|
||||
'.name',
|
||||
],
|
||||
likeCount: [
|
||||
'.content-list__like-cnt',
|
||||
'.like-count',
|
||||
'.link-award-num',
|
||||
],
|
||||
commentCount: [
|
||||
'.content-list__comment-cnt',
|
||||
'.comment-count',
|
||||
'.comment-num',
|
||||
],
|
||||
},
|
||||
|
||||
detail: {
|
||||
title: [
|
||||
'.link-detail__title',
|
||||
'.bbs-link__title',
|
||||
'.article-title .title',
|
||||
'h1',
|
||||
],
|
||||
description: [
|
||||
'.link-detail__content',
|
||||
'.bbs-link__content',
|
||||
'.article-content',
|
||||
'.description',
|
||||
],
|
||||
image: '.article-content img, .bbs-link img, .link-detail img',
|
||||
userLink: 'a[href*="/app/user/profile/"]',
|
||||
userName: [
|
||||
'.user-profile-user-head .name',
|
||||
'.user-info .user-name',
|
||||
'.header .name',
|
||||
],
|
||||
userAvatar: '.user-profile-user-head img, .user-info img, .header img',
|
||||
commentItem: [
|
||||
'.comment-item',
|
||||
'.bbs-comment-item',
|
||||
'.link-comment-item',
|
||||
'.comment__item',
|
||||
'[id*="comment"]',
|
||||
],
|
||||
subCommentItem: [
|
||||
'.sub-comment-item',
|
||||
'.reply-item',
|
||||
'.sub-comment',
|
||||
'.child-comment',
|
||||
],
|
||||
commentAuthor: '.name, .nickname, a[href*="/app/user/profile/"]',
|
||||
commentAvatar: 'img',
|
||||
commentContent: '.content, .comment-content, p',
|
||||
commentTime: '.time, .date, .create-time',
|
||||
commentLikeCount: '.like-count, .like .count',
|
||||
commentReplyButton: 'button:has-text("回复"), .reply-btn, .comment-reply',
|
||||
commentExpandReplies: 'button:has-text("展开"), .show-more, .expand-replies',
|
||||
likeButton: [
|
||||
'.engage-bar-style .like-wrapper',
|
||||
'.like-wrapper',
|
||||
'button:has(.heybox-bbs_thumbs-up_line_24x24)',
|
||||
'button:has(.heybox-bbs_thumbs-up_filled_24x24)',
|
||||
],
|
||||
favoriteButton: [
|
||||
'.engage-bar-style .collect-wrapper',
|
||||
'.collect-wrapper',
|
||||
'button:has(.heybox-bbs_collect_line_24x24)',
|
||||
'button:has(.heybox-bbs_collect_filled_24x24)',
|
||||
],
|
||||
commentCount: '.content-list__comment-cnt, .comment-count, .comment-num',
|
||||
likeCount: '.content-list__like-cnt, .like-count, .link-award-num',
|
||||
favoriteCount: '.favorite-count, .collect-count, .favour-count',
|
||||
commentInput: [
|
||||
'textarea[placeholder*="评论"]',
|
||||
'textarea',
|
||||
'[contenteditable="true"][placeholder*="评论"]',
|
||||
'[contenteditable="true"]',
|
||||
],
|
||||
commentSubmit: [
|
||||
'button:has-text("发送")',
|
||||
'button:has-text("发布")',
|
||||
'button:has-text("评论")',
|
||||
'.comment-submit',
|
||||
'.submit',
|
||||
],
|
||||
},
|
||||
|
||||
profile: {
|
||||
nickname: '.user-profile-user-head .name, .user-info .user-name',
|
||||
avatar: '.user-profile-user-head img, .user-info img.user-image',
|
||||
description: '.user-profile-user-head .desc, .user-info .user-desc, .signature',
|
||||
followCount: '.bbs-info-item .value, .follow-num',
|
||||
postLink: 'a[href*="/app/bbs/link/"]',
|
||||
},
|
||||
|
||||
myPosts: {
|
||||
tabButton: '.creator-content-management__tabs button',
|
||||
postItem: '.content-management-home__content',
|
||||
postLink: 'a[href*="/app/bbs/link/"]',
|
||||
title: '.link-item__title, .title',
|
||||
description: '.link-item__desc, .desc',
|
||||
time: '.time, .date',
|
||||
},
|
||||
} as const;
|
||||
@@ -0,0 +1,52 @@
|
||||
import { extractLinkIdFromUrl, extractUserIdFromUrl } from './extractors.js';
|
||||
|
||||
interface FeedTargetInput {
|
||||
link_id?: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
interface UserTargetInput {
|
||||
user_id?: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
export interface FeedTargetResolved {
|
||||
linkId: string;
|
||||
}
|
||||
|
||||
export interface UserTargetResolved {
|
||||
userId: string;
|
||||
}
|
||||
|
||||
function normalizeUrl(url: string): string {
|
||||
const trimmed = url.trim();
|
||||
if (!trimmed) {
|
||||
throw new Error('url cannot be empty');
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function resolveFeedTarget(input: FeedTargetInput): FeedTargetResolved {
|
||||
const direct = input.link_id?.trim();
|
||||
if (direct) return { linkId: direct };
|
||||
|
||||
if (input.url) {
|
||||
const parsed = extractLinkIdFromUrl(normalizeUrl(input.url));
|
||||
if (parsed) return { linkId: parsed };
|
||||
}
|
||||
|
||||
throw new Error('xhh_get_feed_detail requires link_id or url containing link_id');
|
||||
}
|
||||
|
||||
export function resolveUserTarget(input: UserTargetInput): UserTargetResolved {
|
||||
const direct = input.user_id?.trim();
|
||||
if (direct) return { userId: direct };
|
||||
|
||||
if (input.url) {
|
||||
const parsed = extractUserIdFromUrl(normalizeUrl(input.url));
|
||||
if (parsed) return { userId: parsed };
|
||||
}
|
||||
|
||||
throw new Error('xhh_get_user_profile requires user_id or url containing user_id');
|
||||
}
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
export interface LoginStatus {
|
||||
loggedIn: boolean;
|
||||
username?: string;
|
||||
avatar?: string;
|
||||
userId?: string;
|
||||
}
|
||||
|
||||
export interface QRCodeResult {
|
||||
qrcodeData: string;
|
||||
alreadyLoggedIn: boolean;
|
||||
timeout: string;
|
||||
}
|
||||
|
||||
export interface FeedUser {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
}
|
||||
|
||||
export interface Feed {
|
||||
id: string;
|
||||
title: string;
|
||||
description: string;
|
||||
coverUrl: string;
|
||||
likeCount: number;
|
||||
commentCount: number;
|
||||
user: FeedUser;
|
||||
linkUrl: string;
|
||||
}
|
||||
|
||||
export interface Comment {
|
||||
id: string;
|
||||
parentId?: string;
|
||||
userId: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
content: string;
|
||||
likeCount: number;
|
||||
createTime: string;
|
||||
subComments: Comment[];
|
||||
}
|
||||
|
||||
export interface FeedDetail {
|
||||
id: string;
|
||||
title: string;
|
||||
description: string;
|
||||
images: string[];
|
||||
likeCount: number;
|
||||
favoriteCount: number;
|
||||
commentCount: number;
|
||||
isLiked: boolean;
|
||||
isFavorited: boolean;
|
||||
user: FeedUser;
|
||||
comments: Comment[];
|
||||
}
|
||||
|
||||
export interface UserProfile {
|
||||
id: string;
|
||||
nickname: string;
|
||||
avatar: string;
|
||||
description: string;
|
||||
follows: number;
|
||||
fans: number;
|
||||
likes: number;
|
||||
posts: Feed[];
|
||||
}
|
||||
|
||||
export type MyPostType = 'all' | 'article' | 'image_text' | 'video';
|
||||
|
||||
export interface MyPost extends Feed {
|
||||
type: MyPostType;
|
||||
createTime?: string;
|
||||
modifyTime?: string;
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
import type { Page } from 'rebrowser-playwright';
|
||||
|
||||
import { logger } from '@social/core/utils/logger.js';
|
||||
import { XHH_SELECTORS } from './selectors.js';
|
||||
import type { UserProfile } from './types.js';
|
||||
import { detectCaptchaText, parseCountString } from './extractors.js';
|
||||
import { listFeeds } from './feeds.js';
|
||||
|
||||
const log = logger.child({ module: 'xhh-user-profile' });
|
||||
|
||||
function buildProfileUrl(userId: string): string {
|
||||
return `https://www.xiaoheihe.cn/app/user/profile/${encodeURIComponent(userId)}`;
|
||||
}
|
||||
|
||||
export async function getUserProfile(page: Page, userId: string): Promise<UserProfile> {
|
||||
await page.goto(buildProfileUrl(userId), { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForTimeout(1_200);
|
||||
|
||||
const text = await page.textContent('body').catch(() => '');
|
||||
if (text && detectCaptchaText(text)) {
|
||||
throw new Error('CAPTCHA_REQUIRED: captcha detected on user profile page');
|
||||
}
|
||||
|
||||
const raw = await page.evaluate((selectors) => {
|
||||
const pickText = (selector: string) =>
|
||||
(document.querySelector(selector)?.textContent ?? '').trim();
|
||||
const pickAttr = (selector: string, attr: string) =>
|
||||
(document.querySelector(selector)?.getAttribute(attr) ?? '').trim();
|
||||
|
||||
const counters = [...document.querySelectorAll(selectors.profile.followCount)]
|
||||
.map((node) => (node.textContent ?? '').trim())
|
||||
.filter(Boolean);
|
||||
|
||||
const postLinks = [...document.querySelectorAll<HTMLAnchorElement>(selectors.profile.postLink)]
|
||||
.map((node) => node.getAttribute('href') ?? '')
|
||||
.filter(Boolean);
|
||||
|
||||
return {
|
||||
nickname: pickText(selectors.profile.nickname),
|
||||
avatar: pickAttr(selectors.profile.avatar, 'src'),
|
||||
description: pickText(selectors.profile.description),
|
||||
counters,
|
||||
postLinks,
|
||||
};
|
||||
}, XHH_SELECTORS);
|
||||
|
||||
const [followRaw, fansRaw, likesRaw] = raw.counters;
|
||||
|
||||
const recentPosts = await listFeeds(page).catch(() => []);
|
||||
const filteredPosts = recentPosts
|
||||
.filter((item) => item.user.id === userId || raw.postLinks.some((href: string) => href.includes(item.id)))
|
||||
.slice(0, 20);
|
||||
|
||||
const profile: UserProfile = {
|
||||
id: userId,
|
||||
nickname: raw.nickname,
|
||||
avatar: raw.avatar,
|
||||
description: raw.description,
|
||||
follows: parseCountString(followRaw),
|
||||
fans: parseCountString(fansRaw),
|
||||
likes: parseCountString(likesRaw),
|
||||
posts: filteredPosts,
|
||||
};
|
||||
|
||||
if (!profile.nickname && !profile.avatar) {
|
||||
throw new Error('waiting for selector: xhh profile not found');
|
||||
}
|
||||
|
||||
log.info({ userId, posts: profile.posts.length }, 'xhh user profile extracted');
|
||||
return profile;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user