|
|
|
@@ -1,9 +1,9 @@
|
|
|
|
|
import type { Page, ElementHandle } from 'rebrowser-playwright';
|
|
|
|
|
import type { Page } from 'rebrowser-playwright';
|
|
|
|
|
|
|
|
|
|
import { logger } from '../../utils/logger.js';
|
|
|
|
|
import { XHS_SELECTORS } from './selectors.js';
|
|
|
|
|
import { extractInitialState, parseCountString, ensureHttps } from './feeds.js';
|
|
|
|
|
import type { FeedDetail, Comment, CommentsResult } from './types.js';
|
|
|
|
|
import type { FeedDetail, Comment } from './types.js';
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Constants
|
|
|
|
@@ -155,12 +155,14 @@ interface RawCommentData {
|
|
|
|
|
* Navigate to a Xiaohongshu note detail page and extract comprehensive
|
|
|
|
|
* information including title, content, images/video, and stats.
|
|
|
|
|
*
|
|
|
|
|
* Comments are NOT loaded here — use {@link getFeedComments} instead.
|
|
|
|
|
* First-screen comments (10-20 top-level, each with 1-2 sub-comment
|
|
|
|
|
* previews) are included. Use {@link getSubComments} to load complete
|
|
|
|
|
* sub-comments for a specific parent comment.
|
|
|
|
|
*
|
|
|
|
|
* @param page - A Playwright Page managed by BrowserManager.
|
|
|
|
|
* @param feedId - The note (feed) ID.
|
|
|
|
|
* @param xsecToken - Security token required to access the note.
|
|
|
|
|
* @returns A FeedDetail object with full note data (comments always `[]`).
|
|
|
|
|
* @returns A FeedDetail object with full note data including first-screen comments.
|
|
|
|
|
*/
|
|
|
|
|
export async function getFeedDetail(
|
|
|
|
|
page: Page,
|
|
|
|
@@ -245,68 +247,115 @@ export async function getFeedDetail(
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// getFeedComments
|
|
|
|
|
// getSubComments
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/** Sort order type for comments. */
|
|
|
|
|
export type CommentSort = 'default' | 'newest' | 'most_liked';
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Navigate to a Xiaohongshu note detail page and scrape its comments.
|
|
|
|
|
* Navigate to a Xiaohongshu note detail page, find a specific top-level
|
|
|
|
|
* comment, and load its sub-comments (replies) by clicking "展开更多回复"
|
|
|
|
|
* until we have at least `maxCount` or no more to load.
|
|
|
|
|
*
|
|
|
|
|
* This is a standalone operation — it navigates to the feed URL on its own
|
|
|
|
|
* because each MCP / REST call gets an independent `withPage` session.
|
|
|
|
|
* The first-screen comments (with 1-2 sub-comment previews) are already
|
|
|
|
|
* returned by {@link getFeedDetail}. This function is for loading more
|
|
|
|
|
* sub-comments for a specific parent comment.
|
|
|
|
|
*
|
|
|
|
|
* @param page - A Playwright Page managed by BrowserManager.
|
|
|
|
|
* @param feedId - The note (feed) ID.
|
|
|
|
|
* @param xsecToken - Security token required to access the note.
|
|
|
|
|
* @param sort - Comment sort order (default | newest | most_liked).
|
|
|
|
|
* @param maxCount - Maximum number of top-level comments to load.
|
|
|
|
|
* @returns A CommentsResult with comments array, hasMore flag, and totalCount.
|
|
|
|
|
* @param commentId - The parent comment ID whose sub-comments to load.
|
|
|
|
|
* @param maxCount - Stop loading once we have at least this many (default 20).
|
|
|
|
|
* @returns An array of Comment objects (the sub-comments).
|
|
|
|
|
*/
|
|
|
|
|
export async function getFeedComments(
|
|
|
|
|
export async function getSubComments(
|
|
|
|
|
page: Page,
|
|
|
|
|
feedId: string,
|
|
|
|
|
xsecToken: string,
|
|
|
|
|
sort: CommentSort = 'default',
|
|
|
|
|
commentId: string,
|
|
|
|
|
maxCount = 20,
|
|
|
|
|
): Promise<CommentsResult> {
|
|
|
|
|
): Promise<Comment[]> {
|
|
|
|
|
const url = `${FEED_DETAIL_BASE_URL}/${feedId}?xsec_token=${encodeURIComponent(xsecToken)}&xsec_source=pc_feed`;
|
|
|
|
|
log.debug({ feedId, url, sort, maxCount }, 'Navigating to feed page for comments');
|
|
|
|
|
log.debug({ feedId, commentId, url, maxCount }, 'Navigating to feed page for sub-comments');
|
|
|
|
|
|
|
|
|
|
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
|
|
|
|
|
|
|
|
// Wait for any content to appear — whichever comes first.
|
|
|
|
|
await Promise.race([
|
|
|
|
|
page.waitForSelector(SEL.commentItem, { timeout: 10_000 }),
|
|
|
|
|
page.waitForSelector(SEL.noteContainer, { timeout: 10_000 }),
|
|
|
|
|
]).catch(() => {
|
|
|
|
|
log.warn({ feedId }, 'Page content not found within timeout, proceeding');
|
|
|
|
|
// Wait for the note container, then immediately poll the store —
|
|
|
|
|
// no extra fixed delay needed, the store poll covers timing.
|
|
|
|
|
await page.waitForSelector(SEL.noteContainer, { timeout: 15_000 }).catch(() => {
|
|
|
|
|
log.warn({ feedId }, 'Note container not found within timeout, proceeding');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Switch sort tab if needed.
|
|
|
|
|
if (sort !== 'default') {
|
|
|
|
|
const sortSelector = sort === 'newest'
|
|
|
|
|
? SEL.commentSortNewest
|
|
|
|
|
: SEL.commentSortHottest;
|
|
|
|
|
// Wait for comments store to finish initial load.
|
|
|
|
|
await waitForCommentsStoreReady(page, feedId);
|
|
|
|
|
|
|
|
|
|
const sortTab = await page.$(sortSelector);
|
|
|
|
|
if (sortTab) {
|
|
|
|
|
const isVisible = await sortTab.isVisible().catch(() => false);
|
|
|
|
|
if (isVisible) {
|
|
|
|
|
await sortTab.click().catch(() => {});
|
|
|
|
|
// Wait for comment list to refresh after sort change.
|
|
|
|
|
await page.waitForTimeout(2000);
|
|
|
|
|
log.debug({ feedId, sort }, 'Clicked comment sort tab');
|
|
|
|
|
// -----------------------------------------------------------------------
|
|
|
|
|
// Locate the parent-comment DOM wrapper for the target comment and
|
|
|
|
|
// click "展开更多回复" repeatedly to load all sub-comments.
|
|
|
|
|
// -----------------------------------------------------------------------
|
|
|
|
|
// Find the index of the .parent-comment that owns our commentId so we
|
|
|
|
|
// can get a real ElementHandle (evaluateHandle returns JSHandle which
|
|
|
|
|
// lacks $ / isVisible).
|
|
|
|
|
const parentIndex = await page.evaluate((cid: string) => {
|
|
|
|
|
const parents = document.querySelectorAll('.parent-comment');
|
|
|
|
|
for (let i = 0; i < parents.length; i++) {
|
|
|
|
|
const item = parents[i].querySelector('.comment-item');
|
|
|
|
|
if (!item) continue;
|
|
|
|
|
const id =
|
|
|
|
|
item.getAttribute('id')?.replace(/^comment-/, '') ??
|
|
|
|
|
item.getAttribute('data-id') ??
|
|
|
|
|
item.getAttribute('data-comment-id') ??
|
|
|
|
|
'';
|
|
|
|
|
if (id === cid) return i;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}, commentId);
|
|
|
|
|
|
|
|
|
|
let clicks = 0;
|
|
|
|
|
|
|
|
|
|
if (parentIndex >= 0) {
|
|
|
|
|
const parentEls = await page.$$('.parent-comment');
|
|
|
|
|
const parentEl = parentEls[parentIndex];
|
|
|
|
|
|
|
|
|
|
if (parentEl) {
|
|
|
|
|
// Scroll the comment into view first.
|
|
|
|
|
await parentEl.scrollIntoViewIfNeeded().catch(() => {});
|
|
|
|
|
await page.waitForTimeout(300);
|
|
|
|
|
|
|
|
|
|
while (clicks < MAX_LOAD_MORE_CLICKS) {
|
|
|
|
|
// Check if we already have enough sub-comments in the store.
|
|
|
|
|
const currentCount = await getStoreSubCommentCount(page, feedId, commentId);
|
|
|
|
|
if (currentCount >= maxCount) break;
|
|
|
|
|
|
|
|
|
|
// Look for "load more replies" button inside this comment thread.
|
|
|
|
|
const loadMoreBtn = await parentEl.$('.show-more').catch(() => null);
|
|
|
|
|
if (!loadMoreBtn) break;
|
|
|
|
|
|
|
|
|
|
const isVisible = await loadMoreBtn.isVisible().catch(() => false);
|
|
|
|
|
if (!isVisible) break;
|
|
|
|
|
|
|
|
|
|
await loadMoreBtn.click().catch(() => {});
|
|
|
|
|
await page.waitForTimeout(LOAD_MORE_DELAY_MS);
|
|
|
|
|
clicks++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
log.warn({ feedId, commentId }, 'Target parent-comment not found in DOM');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const result = await scrapeComments(page, maxCount);
|
|
|
|
|
if (clicks > 0) {
|
|
|
|
|
log.debug({ commentId, clicks }, 'Clicked "load more replies" button');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log.info({ feedId, commentCount: result.comments.length, hasMore: result.hasMore, totalCount: result.totalCount }, 'Feed comments extraction complete');
|
|
|
|
|
// -----------------------------------------------------------------------
|
|
|
|
|
// Read sub-comments from the Vue store for this specific comment.
|
|
|
|
|
// -----------------------------------------------------------------------
|
|
|
|
|
const subComments = await extractSubCommentsFromStore(page, feedId, commentId, maxCount);
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
log.info(
|
|
|
|
|
{ feedId, commentId, subCommentCount: subComments.length, maxCount },
|
|
|
|
|
'Sub-comments extraction complete',
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return subComments;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
@@ -670,218 +719,100 @@ async function extractCommentsFromStore(
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Comment scraping from DOM — uses Playwright Node-side API exclusively
|
|
|
|
|
// Sub-comment extraction from Vue store
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Scrape comments from the note detail page DOM.
|
|
|
|
|
*
|
|
|
|
|
* @param page - The current Playwright page (already on the detail URL).
|
|
|
|
|
* @param maxCount - Maximum number of top-level comments to collect.
|
|
|
|
|
* @returns A CommentsResult with comments, hasMore flag, and totalCount.
|
|
|
|
|
* Wait for the comments store to finish its initial request for a given feed.
|
|
|
|
|
*/
|
|
|
|
|
async function scrapeComments(
|
|
|
|
|
page: Page,
|
|
|
|
|
maxCount: number,
|
|
|
|
|
): Promise<CommentsResult> {
|
|
|
|
|
// Scroll down to the comments section to trigger lazy loading.
|
|
|
|
|
await page.evaluate(`
|
|
|
|
|
(() => {
|
|
|
|
|
const commentsArea = document.querySelector('.comments-container');
|
|
|
|
|
if (commentsArea) {
|
|
|
|
|
commentsArea.scrollIntoView({ behavior: 'smooth' });
|
|
|
|
|
} else {
|
|
|
|
|
window.scrollTo(0, document.body.scrollHeight);
|
|
|
|
|
async function waitForCommentsStoreReady(page: Page, feedId: string): Promise<void> {
|
|
|
|
|
await page.evaluate(
|
|
|
|
|
async (id: string) => {
|
|
|
|
|
const maxWaitMs = 5000;
|
|
|
|
|
const pollMs = 200;
|
|
|
|
|
let waited = 0;
|
|
|
|
|
while (waited < maxWaitMs) {
|
|
|
|
|
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
|
|
|
|
Record<string, unknown> | undefined;
|
|
|
|
|
const note = state?.note as Record<string, unknown> | undefined;
|
|
|
|
|
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
|
|
|
|
const entry = map?.[id];
|
|
|
|
|
const comments = entry?.comments as { firstRequestFinish?: boolean } | undefined;
|
|
|
|
|
if (comments?.firstRequestFinish) return;
|
|
|
|
|
await new Promise((r) => setTimeout(r, pollMs));
|
|
|
|
|
waited += pollMs;
|
|
|
|
|
}
|
|
|
|
|
})()
|
|
|
|
|
`);
|
|
|
|
|
|
|
|
|
|
// Wait briefly for comment items to render.
|
|
|
|
|
await page.waitForSelector(SEL.commentItem, { timeout: 2_000 }).catch(() => {
|
|
|
|
|
log.debug('No comment items appeared within 2s, proceeding with empty list');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Try to extract total comment count from the page (engage bar).
|
|
|
|
|
const totalCountText = await page
|
|
|
|
|
.$eval(SEL.commentCount, (el) => el.textContent?.trim() ?? '0')
|
|
|
|
|
.catch(() => '0');
|
|
|
|
|
const totalCount = parseCountString(totalCountText);
|
|
|
|
|
|
|
|
|
|
// Load more comments until we have enough or no more "show more" button.
|
|
|
|
|
let clicks = 0;
|
|
|
|
|
while (clicks < MAX_LOAD_MORE_CLICKS) {
|
|
|
|
|
const currentCount = (await page.$$(SEL.commentItem)).length;
|
|
|
|
|
if (currentCount >= maxCount) break;
|
|
|
|
|
|
|
|
|
|
const showMoreBtn = await page.$(SEL.showMoreComments);
|
|
|
|
|
if (!showMoreBtn) break;
|
|
|
|
|
|
|
|
|
|
const isVisible = await showMoreBtn.isVisible().catch(() => false);
|
|
|
|
|
if (!isVisible) break;
|
|
|
|
|
|
|
|
|
|
await showMoreBtn.click().catch(() => {});
|
|
|
|
|
await page.waitForTimeout(LOAD_MORE_DELAY_MS);
|
|
|
|
|
clicks++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (clicks > 0) {
|
|
|
|
|
log.debug({ clicks }, 'Clicked "show more comments" button');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now extract all visible comments using Playwright Node-side API.
|
|
|
|
|
const commentElements = await page.$$(SEL.commentItem);
|
|
|
|
|
const comments: Comment[] = [];
|
|
|
|
|
|
|
|
|
|
for (const commentEl of commentElements) {
|
|
|
|
|
if (comments.length >= maxCount) break;
|
|
|
|
|
try {
|
|
|
|
|
const comment = await parseCommentElement(commentEl);
|
|
|
|
|
if (comment) {
|
|
|
|
|
comments.push(comment);
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Determine if there are more comments beyond what we collected.
|
|
|
|
|
const showMoreStillExists = await page.$(SEL.showMoreComments).then((btn) => btn !== null).catch(() => false);
|
|
|
|
|
const hasMore = commentElements.length > maxCount ||
|
|
|
|
|
showMoreStillExists ||
|
|
|
|
|
(totalCount > 0 && totalCount > comments.length);
|
|
|
|
|
|
|
|
|
|
return { comments, hasMore, totalCount };
|
|
|
|
|
},
|
|
|
|
|
feedId,
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Parse a single comment element into a Comment object using Playwright
|
|
|
|
|
* Node-side API.
|
|
|
|
|
* Quick check: how many sub-comments does the store currently have for
|
|
|
|
|
* a given parent comment? Used to decide whether to keep clicking.
|
|
|
|
|
*/
|
|
|
|
|
async function parseCommentElement(
|
|
|
|
|
commentEl: ElementHandle,
|
|
|
|
|
): Promise<Comment | null> {
|
|
|
|
|
const content = await commentEl
|
|
|
|
|
.$eval(SEL.commentContent, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
|
|
|
|
|
const nickname = await commentEl
|
|
|
|
|
.$eval(SEL.commentAuthor, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
|
|
|
|
|
const avatar = await commentEl
|
|
|
|
|
.$eval(SEL.commentAvatar, (el) => el.getAttribute('src') ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
|
|
|
|
|
const likeText = await commentEl
|
|
|
|
|
.$eval(SEL.commentLikeCount, (el) => el.textContent?.trim() ?? '0')
|
|
|
|
|
.catch(() => '0');
|
|
|
|
|
|
|
|
|
|
const createTime = await commentEl
|
|
|
|
|
.$eval(SEL.commentTime, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
|
|
|
|
|
const ipLocation = await commentEl
|
|
|
|
|
.$eval(SEL.commentIpLocation, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
|
|
|
|
|
// Try to extract comment ID from the element's attributes.
|
|
|
|
|
// DOM uses id="comment-{id}", strip the prefix.
|
|
|
|
|
const rawCommentId = await commentEl.evaluate(
|
|
|
|
|
(el) =>
|
|
|
|
|
el.getAttribute('id') ??
|
|
|
|
|
el.getAttribute('data-id') ??
|
|
|
|
|
el.getAttribute('data-comment-id') ??
|
|
|
|
|
'',
|
|
|
|
|
async function getStoreSubCommentCount(
|
|
|
|
|
page: Page,
|
|
|
|
|
feedId: string,
|
|
|
|
|
commentId: string,
|
|
|
|
|
): Promise<number> {
|
|
|
|
|
return page.evaluate(
|
|
|
|
|
(args: { feedId: string; commentId: string }) => {
|
|
|
|
|
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
|
|
|
|
Record<string, unknown> | undefined;
|
|
|
|
|
const note = state?.note as Record<string, unknown> | undefined;
|
|
|
|
|
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
|
|
|
|
const entry = map?.[args.feedId];
|
|
|
|
|
const comments = entry?.comments as { list?: Array<Record<string, unknown>> } | undefined;
|
|
|
|
|
if (!comments?.list) return 0;
|
|
|
|
|
const parent = comments.list.find((c) => c.id === args.commentId);
|
|
|
|
|
if (!parent) return 0;
|
|
|
|
|
const subs = (parent.subComments ?? parent.sub_comments ?? []) as unknown[];
|
|
|
|
|
return subs.length;
|
|
|
|
|
},
|
|
|
|
|
{ feedId, commentId },
|
|
|
|
|
);
|
|
|
|
|
const commentId = rawCommentId.replace(/^comment-/, '');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to extract user ID from an author link.
|
|
|
|
|
const authorHref = await commentEl
|
|
|
|
|
.$eval('a[href*="/user/profile/"]', (el) => el.getAttribute('href') ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
const userIdMatch = authorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
|
|
|
|
const userId = userIdMatch?.[1] ?? '';
|
|
|
|
|
/**
|
|
|
|
|
* Read sub-comments for a specific parent comment from the Vue store,
|
|
|
|
|
* capped at `maxCount`.
|
|
|
|
|
*
|
|
|
|
|
* The store structure is:
|
|
|
|
|
* `__INITIAL_STATE__.note.noteDetailMap[feedId].comments.list[]`
|
|
|
|
|
* Each item in `list` has `subComments[]`, `subCommentCount`,
|
|
|
|
|
* `subCommentHasMore`, and `subCommentCursor`.
|
|
|
|
|
*/
|
|
|
|
|
async function extractSubCommentsFromStore(
|
|
|
|
|
page: Page,
|
|
|
|
|
feedId: string,
|
|
|
|
|
commentId: string,
|
|
|
|
|
maxCount: number,
|
|
|
|
|
): Promise<Comment[]> {
|
|
|
|
|
const rawSubComments = await page.evaluate(
|
|
|
|
|
(args: { feedId: string; commentId: string; maxCount: number }) => {
|
|
|
|
|
const state = (window as unknown as Record<string, unknown>).__INITIAL_STATE__ as
|
|
|
|
|
Record<string, unknown> | undefined;
|
|
|
|
|
const note = state?.note as Record<string, unknown> | undefined;
|
|
|
|
|
const map = note?.noteDetailMap as Record<string, Record<string, unknown>> | undefined;
|
|
|
|
|
const entry = map?.[args.feedId];
|
|
|
|
|
const comments = entry?.comments as { list?: Array<Record<string, unknown>> } | undefined;
|
|
|
|
|
if (!comments?.list) return [];
|
|
|
|
|
|
|
|
|
|
// Extract sub-comment count from "展开 X 条回复" text.
|
|
|
|
|
const subCommentCountText = await commentEl
|
|
|
|
|
.$eval(SEL.subCommentCountText, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
const subCountMatch = subCommentCountText.match(/(\d+)/);
|
|
|
|
|
let subCommentCount = subCountMatch ? parseInt(subCountMatch[1], 10) : 0;
|
|
|
|
|
|
|
|
|
|
// Sub-comments (replies)
|
|
|
|
|
const subCommentElements = await commentEl.$$(SEL.subCommentItem);
|
|
|
|
|
const subComments: Comment[] = [];
|
|
|
|
|
|
|
|
|
|
for (const subEl of subCommentElements) {
|
|
|
|
|
try {
|
|
|
|
|
const subContent = await subEl
|
|
|
|
|
.$eval(SEL.commentContent, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
const subAuthor = await subEl
|
|
|
|
|
.$eval(SEL.commentAuthor, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
const subAvatar = await subEl
|
|
|
|
|
.$eval(SEL.commentAvatar, (el) => el.getAttribute('src') ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
const subLikeText = await subEl
|
|
|
|
|
.$eval(SEL.commentLikeCount, (el) => el.textContent?.trim() ?? '0')
|
|
|
|
|
.catch(() => '0');
|
|
|
|
|
const subTime = await subEl
|
|
|
|
|
.$eval(SEL.commentTime, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
const subIp = await subEl
|
|
|
|
|
.$eval(SEL.commentIpLocation, (el) => el.textContent?.trim() ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
|
|
|
|
|
const rawSubId = await subEl.evaluate(
|
|
|
|
|
(el) =>
|
|
|
|
|
el.getAttribute('id') ??
|
|
|
|
|
el.getAttribute('data-id') ??
|
|
|
|
|
el.getAttribute('data-comment-id') ??
|
|
|
|
|
'',
|
|
|
|
|
const parent = comments.list.find(
|
|
|
|
|
(c) => c.id === args.commentId,
|
|
|
|
|
);
|
|
|
|
|
const subId = rawSubId.replace(/^comment-/, '');
|
|
|
|
|
if (!parent) return [];
|
|
|
|
|
|
|
|
|
|
const subAuthorHref = await subEl
|
|
|
|
|
.$eval('a[href*="/user/profile/"]', (el) => el.getAttribute('href') ?? '')
|
|
|
|
|
.catch(() => '');
|
|
|
|
|
const subUserIdMatch = subAuthorHref.match(/\/user\/profile\/([a-f0-9]+)/);
|
|
|
|
|
const subs = (parent.subComments ?? parent.sub_comments ?? []) as unknown[];
|
|
|
|
|
return JSON.parse(JSON.stringify(subs.slice(0, args.maxCount)));
|
|
|
|
|
},
|
|
|
|
|
{ feedId, commentId, maxCount },
|
|
|
|
|
) as RawCommentData[];
|
|
|
|
|
|
|
|
|
|
subComments.push({
|
|
|
|
|
id: subId,
|
|
|
|
|
userId: subUserIdMatch?.[1] ?? '',
|
|
|
|
|
nickname: subAuthor,
|
|
|
|
|
avatar: subAvatar,
|
|
|
|
|
content: subContent,
|
|
|
|
|
likeCount: parseCountString(subLikeText),
|
|
|
|
|
createTime: subTime,
|
|
|
|
|
ipLocation: subIp,
|
|
|
|
|
subCommentCount: 0,
|
|
|
|
|
subComments: [],
|
|
|
|
|
});
|
|
|
|
|
} catch {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we parsed sub-comments but had no count from the button, use the parsed count.
|
|
|
|
|
if (subCommentCount === 0 && subComments.length > 0) {
|
|
|
|
|
subCommentCount = subComments.length;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
id: commentId,
|
|
|
|
|
userId,
|
|
|
|
|
nickname,
|
|
|
|
|
avatar,
|
|
|
|
|
content,
|
|
|
|
|
likeCount: parseCountString(likeText),
|
|
|
|
|
createTime,
|
|
|
|
|
ipLocation,
|
|
|
|
|
subCommentCount,
|
|
|
|
|
subComments,
|
|
|
|
|
};
|
|
|
|
|
return rawSubComments
|
|
|
|
|
.map(parseRawComment)
|
|
|
|
|
.filter((c): c is Comment => c !== null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|