Files
social-mcp/apps/xhh-mcp/src/platforms/xiaoheihe/extractors.ts
T

111 lines
2.9 KiB
TypeScript

import type { Feed } from './types.js';
export function parseCountString(raw: string | number | null | undefined): number {
if (typeof raw === 'number') {
return Number.isFinite(raw) ? raw : 0;
}
const text = (raw ?? '').toString().trim().replace(/,/g, '');
if (!text) return 0;
if (text.endsWith('万')) {
const num = Number.parseFloat(text.slice(0, -1));
if (Number.isNaN(num)) return 0;
return Math.round(num * 10_000);
}
const intNum = Number.parseInt(text, 10);
return Number.isNaN(intNum) ? 0 : intNum;
}
export function detectCaptchaText(text: string): boolean {
const haystack = text.toLowerCase();
return (
haystack.includes('captcha') ||
haystack.includes('show_captcha') ||
haystack.includes('验证码') ||
haystack.includes('tencentcaptcha')
);
}
export function extractLinkIdFromUrl(rawUrl: string): string | undefined {
const trimmed = rawUrl.trim();
if (!trimmed) return undefined;
try {
const url = /^https?:\/\//i.test(trimmed)
? new URL(trimmed)
: trimmed.startsWith('/')
? new URL(`https://www.xiaoheihe.cn${trimmed}`)
: new URL(`https://${trimmed}`);
const pathMatch = url.pathname.match(/\/app\/bbs\/link\/(\d+)/);
if (pathMatch?.[1]) return pathMatch[1];
const queryLinkId = url.searchParams.get('link_id') ?? url.searchParams.get('linkid');
return queryLinkId || undefined;
} catch {
return undefined;
}
}
export function extractUserIdFromUrl(rawUrl: string): string | undefined {
const trimmed = rawUrl.trim();
if (!trimmed) return undefined;
try {
const url = /^https?:\/\//i.test(trimmed)
? new URL(trimmed)
: trimmed.startsWith('/')
? new URL(`https://www.xiaoheihe.cn${trimmed}`)
: new URL(`https://${trimmed}`);
const pathMatch = url.pathname.match(/\/app\/user\/profile\/(\d+)/);
if (pathMatch?.[1]) return pathMatch[1];
const queryUserId = url.searchParams.get('userid') ?? url.searchParams.get('user_id');
return queryUserId || undefined;
} catch {
return undefined;
}
}
export function parseFeedsFromHtmlSnapshot(html: string): Feed[] {
const matches = [...html.matchAll(/href="(\/app\/bbs\/link\/\d+)"/g)];
const seen = new Set<string>();
const feeds: Feed[] = [];
for (const m of matches) {
const href = m[1];
if (!href) continue;
const id = extractLinkIdFromUrl(href);
if (!id || seen.has(id)) continue;
seen.add(id);
feeds.push({
id,
title: '',
description: '',
coverUrl: '',
likeCount: 0,
commentCount: 0,
user: {
id: '',
nickname: '',
avatar: '',
},
linkUrl: `https://www.xiaoheihe.cn${href}`,
});
}
return feeds;
}
export function firstNonEmpty(...values: Array<string | null | undefined>): string {
for (const value of values) {
const trimmed = value?.trim();
if (trimmed) return trimmed;
}
return '';
}