Files
all-by-skill/skills/xiaohongshu-reply-notifications/scripts/run_reply_notifications.py
T

1476 lines
51 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Run XiaoHongShu notification reply flow via shared Playwright CLI session."""
from __future__ import annotations
import argparse
import hashlib
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional
class FlowError(RuntimeError):
"""Raised when a subprocess command in the flow fails."""
NETWORK_SANDBOX_PATTERNS = (
r"\bENOTFOUND\b",
r"\bEAI_AGAIN\b",
r"\bETIMEDOUT\b",
r"\bECONNREFUSED\b",
r"registry\.npmjs\.org",
)
def is_network_sandbox_error(output: str) -> bool:
return any(re.search(pattern, output, flags=re.IGNORECASE) for pattern in NETWORK_SANDBOX_PATTERNS)
def run_command(
cmd: list[str], *, capture_output: bool = True, check: bool = True
) -> subprocess.CompletedProcess[str]:
kwargs: dict[str, object] = {"text": True}
if capture_output:
kwargs["stdout"] = subprocess.PIPE
kwargs["stderr"] = subprocess.STDOUT
proc = subprocess.run(cmd, **kwargs)
if check and proc.returncode != 0:
output = (proc.stdout or "") if capture_output else ""
# Help operators quickly recover when Playwright CLI cannot reach npm
# under sandboxed network rules.
if is_network_sandbox_error(output):
raise FlowError(
"Playwright preflight failed due to network sandbox restrictions.\n"
"Rerun the same command with escalation (sandbox_permissions=require_escalated).\n\n"
f"Command: {' '.join(cmd)}\n{output}"
)
raise FlowError(
f"Command failed ({proc.returncode}): {' '.join(cmd)}\n{output}"
)
return proc
def run_pw(pw_shared: Path, *args: str) -> str:
proc = run_command([str(pw_shared), *args], capture_output=True)
return proc.stdout or ""
def ensure_pw_preflight(pw_shared: Path) -> None:
run_command([str(pw_shared), "--help"], capture_output=True, check=True)
def run_code_json(pw_shared: Path, function_source: str) -> dict[str, Any]:
out = run_pw(pw_shared, "run-code", function_source)
match = re.search(r"(?ms)^### Result\s*\n(.*?)\n### ", out)
if match:
payload = match.group(1).strip()
if payload.startswith("```"):
lines = payload.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
payload = "\n".join(lines[1:-1]).strip()
try:
data = json.loads(payload)
except json.JSONDecodeError:
data = None
else:
if isinstance(data, dict):
return data
return {"value": data}
marker = "__CODEX_JSON__"
for line in reversed(out.splitlines()):
idx = line.find(marker)
if idx >= 0:
payload = line[idx + len(marker) :].strip()
try:
data = json.loads(payload)
except json.JSONDecodeError as exc: # pragma: no cover - runtime guard
raise FlowError(
f"Failed to parse JSON payload from Playwright output:\n{out}"
) from exc
if isinstance(data, dict):
return data
return {"value": data}
raise FlowError(
"Playwright output did not contain expected JSON marker.\n" + out[-5000:]
)
def now_utc_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def normalize_text(text: str) -> str:
value = text.strip()
value = re.sub(r"\s+", " ", value)
value = (
value.replace("", ",")
.replace("", ".")
.replace("", "!")
.replace("", "?")
.replace("", ":")
)
value = re.sub(r"^回复\s*[^:]{1,30}\s*[:]\s*", "", value)
return value.strip()
def reply_hash(reply_text: str) -> str:
return hashlib.sha256(normalize_text(reply_text).encode("utf-8")).hexdigest()
def parse_self_profile_id(profile_href: str) -> str:
match = re.search(r"/user/profile/([^/?#]+)", profile_href)
if not match:
return ""
return match.group(1)
def classify_comment_quality(comment_text: str) -> str:
text = normalize_text(comment_text)
if not text:
return "not_satisfied"
if re.fullmatch(r"[\W_]+", text):
return "not_satisfied"
if re.search(r"(加v|vx|私聊我|互赞|引流|广告)", text, flags=re.IGNORECASE):
return "not_satisfied"
if re.search(r"(http://|https://|www\.)", text, flags=re.IGNORECASE):
return "not_satisfied"
return "satisfied"
def draft_reply(
*,
target_comment: str,
note_title: str,
fixed_reply: str | None,
) -> tuple[str, str, str]:
if fixed_reply:
return fixed_reply.strip(), "high", "fixed_reply"
text = normalize_text(target_comment)
lower = text.lower()
has_question = "?" in text or "" in target_comment or any(
key in text for key in ("怎么", "如何", "可以", "能不能", "是否", "多少", "多久", "")
)
if any(k in lower for k in ("business", "gpt", "chatgpt")) and any(
k in text for k in ("额度", "配额", "套餐", "方案", "订阅")
):
return "Business额度按当前套餐走,升级套餐额度会同步提高。", "high", "known_business_quota"
if has_question:
if len(text) <= 8:
return "这个细节怕说不准,方便私信我,我详细说下。", "low", "question_too_short"
if any(k in text for k in ("bug", "报错", "错误", "失败", "不能", "无效")):
return "先看报错信息和步骤,一般定位后就能快速解决。", "medium", "diagnostic_hint"
if note_title:
return "这个要看具体场景,按你这条情况通常可以这样处理。", "medium", "cautious_direct"
return "这个细节怕说不准,方便私信我,我详细说下。", "low", "missing_context"
if any(k in text for k in ("谢谢", "有用", "喜欢", "学到了", "收藏")):
return "谢谢你支持,我会继续更新更实用的内容。", "high", "positive_feedback"
return "感谢留言,这条我后续会补充得更清楚。", "medium", "default"
def is_logged_in(pw_shared: Path) -> dict[str, Any]:
return run_code_json(
pw_shared,
r"""
async (page) => {
await page.goto('https://www.xiaohongshu.com/notification', { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(1000);
const data = await page.evaluate(() => {
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
const text = document.body?.innerText || '';
const isLoginPage =
/\/login/.test(location.pathname) ||
/手机号登录|获取验证码|扫码|登录后推荐更懂你的笔记|马上登录/.test(text);
const woNodes = Array.from(document.querySelectorAll('a,button,span,div'))
.filter((el) => normalize(el.textContent) === '')
.slice(0, 20);
const woProfileCandidates = [];
for (const node of woNodes) {
const anchor = node.closest('a[href]');
if (anchor) woProfileCandidates.push((anchor.getAttribute('href') || '').trim());
}
const woProfileHref = woProfileCandidates.find((href) => /\/user\/profile\//.test(href)) || '';
const hasMentionsTab = Array.from(document.querySelectorAll('a,button,div,span')).some((el) => {
const t = normalize(el.textContent);
return t === '评论和@' || t === '评论与@';
});
return {
logged_in: Boolean(!isLoginPage && (hasMentionsTab || woProfileHref)),
profile_href: woProfileHref,
is_login_page: isLoginPage,
has_mentions_tab: hasMentionsTab,
};
});
return { ...data, url: page.url() };
}
""".strip(),
)
def open_comment_notifications(pw_shared: Path) -> dict[str, Any]:
return run_code_json(
pw_shared,
r"""
async (page) => {
await page.goto('https://www.xiaohongshu.com/notification', { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(1000);
const tryClick = async (locator) => {
try {
if (await locator.count()) {
await locator.first().click({ timeout: 2000 });
return true;
}
} catch (_) {}
return false;
};
let clicked = false;
clicked = clicked || await tryClick(page.getByRole('tab', { name: /评论和@|评论与@/i }));
clicked = clicked || await tryClick(page.getByRole('button', { name: /评论和@|评论与@/i }));
clicked = clicked || await tryClick(page.getByText(/评论和@|评论与@/i));
await page.waitForTimeout(800);
const pageState = await page.evaluate(() => {
const text = document.body?.innerText || '';
const requiresLogin =
/\/login/.test(location.pathname) ||
/手机号登录|获取验证码|扫码|登录后推荐更懂你的笔记|马上登录/.test(text);
return { requires_login: requiresLogin };
});
return {
ok: Boolean((clicked || /\/notification/.test(page.url())) && !pageState.requires_login),
url: page.url(),
requires_login: pageState.requires_login,
};
}
""".strip(),
)
def collect_notification_cards(
pw_shared: Path, *, max_scan_cards: int, max_scroll_rounds: int
) -> list[dict[str, Any]]:
options_json = json.dumps(
{
"maxScanCards": max_scan_cards,
"maxScrollRounds": max_scroll_rounds,
},
ensure_ascii=False,
)
function_source = (
r"""
async (page) => {
const options = __OPTIONS__;
const data = await page.evaluate(async (options) => {
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
const eventRe = /评论了你的笔记|回复了你的评论|在评论中@了你|你的好友评论了你的笔记|你的好友回复了你的评论|你的好友在评论中@了你/;
const eventTypeFromText = (text) => {
if (/回复了你的评论|你的好友回复了你的评论/.test(text)) return '回复了你的评论';
if (/在评论中@了你|你的好友在评论中@了你|@了你/.test(text)) return '在评论中@了你';
if (/评论了你的笔记|你的好友评论了你的笔记/.test(text)) return '评论了你的笔记';
return '';
};
const timestampFromText = (text) => {
const m = text.match(
/(刚刚|\d+分钟前|\d+小时前|昨天(?:\s*\d{1,2}:\d{2})?|前天(?:\s*\d{1,2}:\d{2})?|\d{1,2}[-\/]\d{1,2}(?:\s*\d{1,2}:\d{2})?)/
);
return m ? normalize(m[1]) : '';
};
const toAbsUrl = (href) => {
if (!href) return '';
try {
return new URL(href, location.origin).toString();
} catch {
return '';
}
};
const getScrollContainer = () => {
const candidates = Array.from(document.querySelectorAll('*')).filter((el) => {
const style = window.getComputedStyle(el);
const scrollable = /(auto|scroll)/.test(style.overflowY || '');
return scrollable && el.scrollHeight > el.clientHeight + 40;
});
candidates.sort((a, b) => b.clientHeight - a.clientHeight);
return candidates[0] || null;
};
const isVisible = (el) => {
if (!el) return false;
const rect = el.getBoundingClientRect();
const style = window.getComputedStyle(el);
return (
rect.width > 20 &&
rect.height > 14 &&
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0'
);
};
const parseCard = (text, eventType) => {
const cleaned = normalize(text);
const idx = cleaned.indexOf(eventType);
let notifier = '';
if (idx > 0) {
notifier = normalize(cleaned.slice(0, idx).replace(/你的好友/g, ''));
}
notifier = notifier.slice(-20);
let snippet = cleaned;
if (notifier) snippet = snippet.replace(notifier, '');
snippet = snippet.replace(eventType, '').replace(/你的好友/g, '');
const ts = timestampFromText(cleaned);
if (ts) snippet = snippet.replace(ts, '');
snippet = snippet
.replace(/回复\s*取消评论将会清空已经输入的内容确认返回/g, '')
.replace(/回复\s*取消评论/g, '')
.replace(/回到顶部/g, '');
snippet = normalize(snippet).slice(0, 120);
return {
notifier,
snippet,
timestamp: ts,
cardText: cleaned.slice(0, 260),
};
};
const cards = [];
const seen = new Set();
let unchangedRounds = 0;
let lastCount = 0;
for (let round = 0; round < options.maxScrollRounds; round += 1) {
const roots = Array.from(document.querySelectorAll('div,li,article,section'));
for (const root of roots) {
if (!isVisible(root)) continue;
const text = normalize(root.innerText || root.textContent || '');
if (!text || text.length < 12 || text.length > 260) continue;
if (!eventRe.test(text)) continue;
if (/评论和@|赞和收藏|新增关注/.test(text) && text.length < 30) continue;
if (/温馨提示|广告屏蔽|沪ICP备/.test(text)) continue;
const eventType = eventTypeFromText(text);
if (!eventType) continue;
const parsed = parseCard(text, eventType);
if (!parsed.notifier && !parsed.snippet) continue;
const linkEl = root.querySelector('a[href*="/explore/"],a[href*="/discovery/item/"],a[href*="/note/"]');
const noteUrl = toAbsUrl(linkEl?.getAttribute('href') || '');
const key = [
parsed.notifier,
eventType,
parsed.timestamp,
parsed.snippet.slice(0, 60),
noteUrl,
].join('|');
if (seen.has(key)) continue;
seen.add(key);
cards.push({
note_url: noteUrl,
notifier: parsed.notifier,
event_type: eventType,
notifier_snippet: parsed.snippet,
card_text: parsed.cardText,
timestamp_text: parsed.timestamp,
});
if (cards.length >= options.maxScanCards) break;
}
if (cards.length >= options.maxScanCards) break;
const scroller = getScrollContainer();
if (scroller) {
scroller.scrollTop += Math.max(320, Math.floor(scroller.clientHeight * 0.92));
} else {
window.scrollBy(0, Math.max(520, Math.floor(window.innerHeight * 0.92)));
}
await sleep(700);
if (cards.length === lastCount) {
unchangedRounds += 1;
} else {
unchangedRounds = 0;
lastCount = cards.length;
}
if (unchangedRounds >= 2) break;
}
return {
cards,
scanned: cards.length,
};
}, options);
return data;
}
""".strip().replace("__OPTIONS__", options_json)
)
result = run_code_json(
pw_shared,
function_source,
)
cards = result.get("cards")
if isinstance(cards, list):
return [c for c in cards if isinstance(c, dict)]
return []
def open_note_detail_from_card(
pw_shared: Path,
*,
card: dict[str, Any],
) -> dict[str, Any]:
payload_json = json.dumps(card, ensure_ascii=False)
function_source = (
r"""
async (page) => {
const card = __CARD__;
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
const detailUrlRe = /\/explore\/|\/discovery\/item\/|\/note\//;
await page.goto('https://www.xiaohongshu.com/notification', { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(1000);
const tab = page.getByText(/评论和@|评论与@/i).first();
if (await tab.count()) {
await tab.click().catch(() => null);
await page.waitForTimeout(400);
}
const eventType = normalize(card.event_type || '');
const notifier = normalize(card.notifier || '');
const snippet = normalize(card.notifier_snippet || '');
const timestamp = normalize(card.timestamp_text || '');
const eventRe = /评论了你的笔记|回复了你的评论|在评论中@了你|你的好友评论了你的笔记|你的好友回复了你的评论|你的好友在评论中@了你/;
const matchInfo = await page.evaluate(({ eventType, notifier, snippet, timestamp }) => {
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
const eventRe =
/评论了你的笔记|回复了你的评论|在评论中@了你|你的好友评论了你的笔记|你的好友回复了你的评论|你的好友在评论中@了你/;
const visible = (el) => {
const rect = el.getBoundingClientRect();
const style = window.getComputedStyle(el);
return (
rect.width > 20 &&
rect.height > 14 &&
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0'
);
};
document.querySelectorAll('[data-codex-card-target="1"]').forEach((el) => {
el.removeAttribute('data-codex-card-target');
});
const roots = Array.from(document.querySelectorAll('div,li,article,section'));
let best = null;
let bestScore = -999;
let bestText = '';
for (const el of roots) {
if (!visible(el)) continue;
const text = normalize(el.innerText || el.textContent || '');
if (!text || text.length < 12 || text.length > 260) continue;
if (!eventRe.test(text)) continue;
let score = 0;
if (eventType && text.includes(eventType)) score += 5;
if (notifier && text.includes(notifier)) score += 4;
if (timestamp && text.includes(timestamp)) score += 1;
if (snippet) {
const tokens = snippet.split(' ').filter((t) => t.length >= 2).slice(0, 4);
for (const token of tokens) {
if (text.includes(token)) score += 1;
}
}
if (/回复/.test(text)) score += 1;
if (score > bestScore) {
best = el;
bestScore = score;
bestText = text.slice(0, 220);
}
}
if (!best) return { found: false };
best.setAttribute('data-codex-card-target', '1');
return { found: true, score: bestScore, text: bestText };
}, { eventType, notifier, snippet, timestamp });
if (!matchInfo?.found) {
return { opened: false, reason: 'card_not_matched' };
}
const target = page.locator('[data-codex-card-target="1"]').first();
if (!(await target.count())) {
return { opened: false, reason: 'target_missing_after_match' };
}
await target.scrollIntoViewIfNeeded().catch(() => null);
// Always click comment content area, never the "回复" action.
let clickTarget = target.locator('.main,.item-main,.content-main,.comment-content,[class*="comment-content"]').first();
if (!(await clickTarget.count())) {
clickTarget = target.locator('.content,.info,.interaction-hint,[class*="info"]').first();
}
if (!(await clickTarget.count())) {
return {
opened: false,
reason: 'comment_click_target_not_found',
url: page.url(),
via: 'card_match',
match: matchInfo,
};
}
await clickTarget.click({ timeout: 3000 }).catch(() => null);
const waitOutcome = async (timeoutMs) => {
const start = Date.now();
const deletedReSource = '该内容已被删除|内容已被删除|笔记已删除|当前笔记暂时无法浏览';
while (Date.now() - start < timeoutMs) {
const cur = page.url();
if (detailUrlRe.test(cur)) return { state: 'opened', url: cur };
if (/\/404(?:[/?#]|$)/.test(cur)) return { state: 'deleted', url: cur };
const deletedByToast = await page
.evaluate((reSource) => {
const re = new RegExp(reSource, 'i');
const visible = (el) => {
const rect = el.getBoundingClientRect();
const style = window.getComputedStyle(el);
return (
rect.width > 10 &&
rect.height > 10 &&
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0'
);
};
const nodes = Array.from(document.querySelectorAll('div,span,p,section,article'));
return nodes.some((el) => visible(el) && re.test((el.textContent || '').trim()));
}, deletedReSource)
.catch(() => false);
if (deletedByToast) return { state: 'deleted', url: cur };
await page.waitForTimeout(180);
}
const finalUrl = page.url();
if (/\/404(?:[/?#]|$)/.test(finalUrl)) return { state: 'deleted', url: finalUrl };
if (detailUrlRe.test(finalUrl)) return { state: 'opened', url: finalUrl };
return { state: 'stayed', url: finalUrl };
};
let outcome = await waitOutcome(3600);
let after = outcome.url;
if (outcome.state === 'deleted') {
return {
opened: false,
reason: 'content_deleted',
url: after,
via: 'card_match',
match: matchInfo,
};
}
if (outcome.state !== 'opened') {
const anchor = target
.locator('a[href*="/explore/"],a[href*="/discovery/item/"],a[href*="/note/"]')
.first();
if (await anchor.count()) {
await anchor.click().catch(() => null);
outcome = await waitOutcome(3600);
after = outcome.url;
if (outcome.state === 'deleted') {
return {
opened: false,
reason: 'content_deleted',
url: after,
via: 'card_match',
match: matchInfo,
};
}
}
}
return {
opened: outcome.state === 'opened' || detailUrlRe.test(after),
url: after,
via: 'card_match',
match: matchInfo,
};
}
""".strip().replace("__CARD__", payload_json)
)
result = run_code_json(pw_shared, function_source)
if not isinstance(result, dict):
return {"opened": False, "reason": "invalid_open_result"}
return result
def collect_note_detail_context(
pw_shared: Path,
*,
card: dict[str, Any],
self_profile_id: str,
reuse_current_page: bool = False,
) -> dict[str, Any]:
payload_json = json.dumps(
{
"card": card,
"selfProfileId": self_profile_id,
"reuseCurrentPage": reuse_current_page,
},
ensure_ascii=False,
)
function_source = (
r"""
async (page) => {
const input = __PAYLOAD__;
const card = input.card || {};
const selfProfileId = input.selfProfileId || '';
const reuseCurrentPage = Boolean(input.reuseCurrentPage);
const detailUrlRe = /\/explore\/|\/discovery\/item\/|\/note\//;
if (!card.note_url) {
return { target_found: false, reason: 'missing_note_url' };
}
if (!reuseCurrentPage) {
await page.goto(card.note_url, { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(1200);
} else {
const cur = page.url();
if (!detailUrlRe.test(cur)) {
await page.goto(card.note_url, { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(1200);
}
}
const pageUrl = page.url();
const data = await page.evaluate(
({ card, selfProfileId, pageUrl }) => {
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
const noteIdMatch = (pageUrl.match(/\/explore\/([^/?#]+)/) || [])[1] || '';
const titleEl =
document.querySelector('h1') ||
document.querySelector('[class*="title"]') ||
document.querySelector('meta[property="og:title"]');
const noteTitle = normalize(titleEl?.textContent || titleEl?.getAttribute?.('content') || '');
const noteDescEl =
document.querySelector('[class*="desc"]') ||
document.querySelector('[class*="content"]') ||
document.querySelector('meta[name="description"]');
const noteDesc = normalize(noteDescEl?.textContent || noteDescEl?.getAttribute?.('content') || '');
const noteAuthorEl =
document.querySelector('[class*="author"]') ||
document.querySelector('a[href*="/user/profile/"] span') ||
document.querySelector('a[href*="/user/profile/"]');
const noteAuthor = normalize(noteAuthorEl?.textContent || '');
const hashtags = Array.from(
new Set(((noteTitle + ' ' + noteDesc).match(/#[^\s#]+/g) || []).slice(0, 12))
);
const commentNodes = Array.from(document.querySelectorAll('[id^="comment-"], .comment-item'));
const comments = [];
for (const node of commentNodes) {
const domId = node?.id && String(node.id).startsWith('comment-') ? String(node.id) : '';
if (!domId) continue;
const parentCommentId = domId.replace(/^comment-/, '');
if (!parentCommentId) continue;
const authorEl =
node.querySelector('.name') ||
node.querySelector('.author') ||
node.querySelector('[class*="author"]') ||
node.querySelector('a[href*="/user/profile/"]');
const author = normalize(authorEl?.textContent || '');
const contentEl =
node.querySelector('.content') ||
node.querySelector('.desc') ||
node.querySelector('[class*="content"]') ||
node.querySelector('[class*="desc"]');
const content = normalize(contentEl?.textContent || '');
const timeEl = node.querySelector('.time') || node.querySelector('[class*="time"]');
const timestampText = normalize(timeEl?.textContent || '');
const profileLinks = Array.from(node.querySelectorAll('a[href*="/user/profile/"]'));
const repliedByMe = Boolean(
selfProfileId &&
profileLinks.some((a) => (a.getAttribute('href') || '').includes(`/user/profile/${selfProfileId}`))
);
comments.push({
dom_id: domId,
parent_comment_id: parentCommentId,
author,
content,
timestamp_text: timestampText,
replied_by_me: repliedByMe,
});
}
const cardSnippet = normalize(card?.notifier_snippet || '');
const snippetTokens = cardSnippet.split(' ').filter((t) => t.length >= 2).slice(0, 6);
let best = null;
let bestScore = -999;
for (const c of comments) {
let score = 0;
if (card?.notifier && c.author && c.author.includes(card.notifier)) score += 3;
for (const token of snippetTokens) {
if (token && c.content && c.content.includes(token)) score += 1;
}
if (card?.event_type && String(card.event_type).includes('@') && c.content.includes('@')) score += 1;
if (!c.content) score -= 1;
if (score > bestScore) {
bestScore = score;
best = c;
}
}
if (!best && comments.length > 0) best = comments[0];
return {
note_url: pageUrl,
note_id: noteIdMatch,
note_title: noteTitle,
note_desc: noteDesc,
note_author: noteAuthor,
hashtags,
card,
comment_count: comments.length,
target_found: Boolean(best),
target: best,
best_score: bestScore,
};
},
{ card, selfProfileId, pageUrl }
);
return data;
}
""".strip().replace("__PAYLOAD__", payload_json)
)
result = run_code_json(
pw_shared,
function_source,
)
if not isinstance(result, dict):
raise FlowError("Invalid detail context response from Playwright.")
return result
def check_reply_state_for_target(
pw_shared: Path,
*,
parent_comment_id: str,
self_profile_id: str,
max_expand_rounds: int,
max_scan_replies: int,
max_expand_seconds: int,
) -> dict[str, Any]:
result = run_code_json(
pw_shared,
f"""
async (page) => {{
const parentCommentId = {json.dumps(parent_comment_id, ensure_ascii=False)};
const selfProfileId = {json.dumps(self_profile_id, ensure_ascii=False)};
const maxExpandRounds = {int(max_expand_rounds)};
const maxScanReplies = {int(max_scan_replies)};
const maxExpandMs = {int(max_expand_seconds)} * 1000;
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const rootId = `comment-${{parentCommentId}}`;
const scanState = async () => {{
return await page.evaluate(
({{ rootId, selfProfileId, maxScanReplies }}) => {{
const normalize = (v) => (v || '').replace(/\\s+/g, ' ').trim();
const isVisible = (el) => {{
if (!el) return false;
const rect = el.getBoundingClientRect();
const style = window.getComputedStyle(el);
return (
rect.width > 4 &&
rect.height > 4 &&
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0'
);
}};
const hasMyProfileAnchor = (el) => {{
if (!el || !selfProfileId) return false;
const links = Array.from(el.querySelectorAll('a[href*="/user/profile/"]'));
return links.some((a) =>
(a.getAttribute('href') || '').includes(`/user/profile/${{selfProfileId}}`)
);
}};
const root = document.querySelector(`#${{rootId}}`);
if (!root) {{
return {{
root_found: false,
my_reply_found: false,
expandable_found: false,
reply_nodes_scanned: 0,
}};
}}
const nestedCommentNodes = Array.from(root.querySelectorAll('[id^="comment-"]'))
.filter((el) => (el.id || '') !== rootId)
.filter((el) => isVisible(el))
.slice(0, maxScanReplies);
let myReplyFound = false;
for (const node of nestedCommentNodes) {{
if (hasMyProfileAnchor(node)) {{
myReplyFound = true;
break;
}}
}}
const expandControls = Array.from(root.querySelectorAll('button,span,div,a'))
.filter((el) => isVisible(el))
.filter((el) => {{
const t = normalize(el.textContent || '');
if (!t || t.length > 30) return false;
if (/收起|已全部|没有更多|暂无更多/.test(t)) return false;
return /展开|更多|查看.*回复|全部回复|共\\d+条回复|显示更多|加载更多/.test(t);
}});
return {{
root_found: true,
my_reply_found: myReplyFound,
expandable_found: expandControls.length > 0,
reply_nodes_scanned: nestedCommentNodes.length,
}};
}},
{{ rootId, selfProfileId, maxScanReplies }}
);
}};
const clickExpand = async () => {{
return await page.evaluate(({{ rootId }}) => {{
const normalize = (v) => (v || '').replace(/\\s+/g, ' ').trim();
const isVisible = (el) => {{
if (!el) return false;
const rect = el.getBoundingClientRect();
const style = window.getComputedStyle(el);
return (
rect.width > 4 &&
rect.height > 4 &&
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0'
);
}};
const root = document.querySelector(`#${{rootId}}`);
if (!root) return false;
const controls = Array.from(root.querySelectorAll('button,span,div,a'))
.filter((el) => isVisible(el))
.filter((el) => {{
const t = normalize(el.textContent || '');
if (!t || t.length > 30) return false;
if (/收起|已全部|没有更多|暂无更多/.test(t)) return false;
return /展开|更多|查看.*回复|全部回复|共\\d+条回复|显示更多|加载更多/.test(t);
}});
if (!controls.length) return false;
controls[0].click();
return true;
}}, {{ rootId }});
}};
const startedAt = Date.now();
let rounds = 0;
let expandClicks = 0;
let maxSeenReplyNodes = 0;
let sawExpandable = false;
while (rounds < maxExpandRounds && Date.now() - startedAt <= maxExpandMs) {{
rounds += 1;
const state = await scanState();
maxSeenReplyNodes = Math.max(maxSeenReplyNodes, state.reply_nodes_scanned || 0);
if (!state.root_found) {{
return {{
state: 'unknown',
reason: 'target_not_found',
rounds,
expand_clicks: expandClicks,
reply_nodes_scanned: maxSeenReplyNodes,
}};
}}
if (state.my_reply_found) {{
return {{
state: 'already_replied',
reason: 'my_reply_visible',
rounds,
expand_clicks: expandClicks,
reply_nodes_scanned: maxSeenReplyNodes,
}};
}}
if (!state.expandable_found) {{
return {{
state: 'not_replied',
reason: maxSeenReplyNodes > 0 ? 'visible_replies_no_mine' : 'no_replies_visible',
rounds,
expand_clicks: expandClicks,
reply_nodes_scanned: maxSeenReplyNodes,
}};
}}
sawExpandable = true;
const clicked = await clickExpand();
if (!clicked) {{
return {{
state: 'unknown',
reason: 'expand_click_failed',
rounds,
expand_clicks: expandClicks,
reply_nodes_scanned: maxSeenReplyNodes,
}};
}}
expandClicks += 1;
await sleep(300);
}}
if (sawExpandable) {{
return {{
state: 'unknown',
reason: 'expand_limit_reached',
rounds,
expand_clicks: expandClicks,
reply_nodes_scanned: maxSeenReplyNodes,
}};
}}
return {{
state: 'not_replied',
reason: 'default_not_replied',
rounds,
expand_clicks: expandClicks,
reply_nodes_scanned: maxSeenReplyNodes,
}};
}}
""".strip(),
)
if not isinstance(result, dict):
return {"state": "unknown", "reason": "invalid_reply_state_result"}
state = str(result.get("state", "unknown")).strip() or "unknown"
if state not in {"already_replied", "not_replied", "unknown"}:
state = "unknown"
result["state"] = state
return result
def like_target_comment(pw_shared: Path, parent_comment_id: str) -> str:
result = run_code_json(
pw_shared,
f"""
async (page) => {{
const parentCommentId = {json.dumps(parent_comment_id, ensure_ascii=False)};
const domId = `#comment-${{parentCommentId}}`;
const root = page.locator(domId).first();
if (!(await root.count())) return {{ status: 'like_failed', reason: 'target_not_found' }};
let likeBtn = root.getByRole('button', {{ name: /赞|like/i }}).first();
if (!(await likeBtn.count())) {{
likeBtn = root.locator('button,[role="button"]').filter({{ hasText: /赞|like/i }}).first();
}}
if (!(await likeBtn.count())) return {{ status: 'like_failed', reason: 'like_button_not_found' }};
const text = ((await likeBtn.textContent()) || '').replace(/\\s+/g, ' ').trim();
const cls = (await likeBtn.getAttribute('class')) || '';
const pressed = (await likeBtn.getAttribute('aria-pressed')) || '';
const alreadyLiked =
/已赞|取消赞|liked/i.test(text) ||
/active|liked|is-liked/i.test(cls) ||
pressed === 'true';
if (alreadyLiked) return {{ status: 'already_liked' }};
try {{
await likeBtn.click({{ timeout: 2000 }});
await page.waitForTimeout(500);
return {{ status: 'liked' }};
}} catch (_) {{
return {{ status: 'like_failed', reason: 'click_failed' }};
}}
}}
""".strip(),
)
return str(result.get("status", "like_failed"))
def send_reply_for_comment(
pw_shared: Path, *, parent_comment_id: str, reply_text: str, mode: str
) -> dict[str, Any]:
result = run_code_json(
pw_shared,
f"""
async (page) => {{
const parentCommentId = {json.dumps(parent_comment_id, ensure_ascii=False)};
const replyText = {json.dumps(reply_text, ensure_ascii=False)};
const mode = {json.dumps(mode, ensure_ascii=False)};
const root = page.locator(`#comment-${{parentCommentId}}`).first();
if (!(await root.count())) return {{ ok: false, status: 'failed', reason: 'target_not_found' }};
let replyButton = root.getByRole('button', {{ name: /回复/i }}).first();
if (!(await replyButton.count())) {{
replyButton = root.locator('button,[role="button"],span,div').filter({{ hasText: /回复/ }}).first();
}}
if (!(await replyButton.count())) {{
return {{ ok: false, status: 'failed', reason: 'reply_button_not_found' }};
}}
try {{
await replyButton.click({{ timeout: 2000 }});
}} catch (_) {{
return {{ ok: false, status: 'failed', reason: 'reply_click_failed' }};
}}
await page.waitForTimeout(500);
const pickVisibleEditor = async () => {{
const selectors = [
'textarea',
'.engage-bar [contenteditable="true"]',
'.engage-bar .content-input',
'[contenteditable="true"]',
'div[role="textbox"]',
'.content-input',
'.input-box',
];
for (const sel of selectors) {{
const loc = page.locator(sel);
const count = await loc.count();
for (let i = count - 1; i >= 0; i -= 1) {{
const node = loc.nth(i);
const visible = await node.isVisible().catch(() => false);
if (visible) return node;
}}
}}
return null;
}};
const editor = await pickVisibleEditor();
if (!editor) {{
return {{ ok: false, status: 'failed', reason: 'editor_not_found' }};
}}
const tagName = await editor.evaluate((el) => (el.tagName || '').toLowerCase()).catch(() => '');
if (tagName === 'textarea' || tagName === 'input') {{
await editor.click().catch(() => null);
await editor.fill(replyText).catch(() => null);
}} else {{
await editor.click().catch(() => null);
await page.keyboard.press('Meta+A').catch(() => null);
await page.keyboard.press('Control+A').catch(() => null);
await page.keyboard.type(replyText, {{ delay: 15 }}).catch(() => null);
const currentText = await editor.evaluate((el) => (el.textContent || '').trim()).catch(() => '');
if (!currentText) {{
await page.keyboard.insertText(replyText).catch(() => null);
}}
}}
if (mode !== 'live') {{
return {{ ok: true, status: 'planned' }};
}}
const sendBtn = page.getByRole('button', {{ name: /发送/i }}).last();
if (!(await sendBtn.count())) {{
return {{ ok: false, status: 'failed', reason: 'send_button_not_found' }};
}}
await sendBtn.click();
await page.waitForTimeout(1000);
const successToast = page.getByText(/评论成功|回复成功|发送成功/i).first();
const verified = await successToast.isVisible().catch(() => false);
return {{
ok: true,
status: verified ? 'replied' : 'sent_unknown',
verified,
}};
}}
""".strip(),
)
if not isinstance(result, dict):
return {"ok": False, "status": "failed", "reason": "invalid_send_result"}
return result
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Reply XiaoHongShu 通知-评论和@ items with online reply-state checks."
)
parser.add_argument(
"--mode",
choices=("safe", "live"),
default="safe",
help="safe: draft only; live: send replies. Default: safe",
)
parser.add_argument(
"--max-scan-cards",
type=int,
default=120,
help="Max cards discovered from 评论和@ page. Default: 120",
)
parser.add_argument(
"--max-scroll-rounds",
type=int,
default=20,
help="Max scroll rounds for discovery. Default: 20",
)
parser.add_argument(
"--max-items",
type=int,
default=20,
help="Max items to process from discovered cards. Default: 20",
)
parser.add_argument(
"--early-stop-non-actionable",
type=int,
default=20,
help="Stop when this many consecutive non-actionable items are seen. Default: 20",
)
parser.add_argument(
"--fixed-reply",
help="Optional fixed reply text for all actionable items.",
)
parser.add_argument(
"--no-like",
action="store_true",
help="Disable conditional like action for satisfied comments.",
)
parser.add_argument(
"--max-expand-rounds",
type=int,
default=8,
help="Max rounds for clicking 展开更多回复 when checking already-replied state. Default: 8",
)
parser.add_argument(
"--max-scan-replies",
type=int,
default=200,
help="Max nested replies scanned in one parent thread. Default: 200",
)
parser.add_argument(
"--max-expand-seconds",
type=int,
default=20,
help="Max seconds for online reply-state expansion checks. Default: 20",
)
parser.add_argument(
"--report",
default="",
help="Absolute path for run report JSON. Default: output/xhs-reply/reply-report-<ts>.json",
)
parser.add_argument(
"--no-headed",
action="store_true",
help="Initialize Playwright session in headless mode when session is missing.",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
if (
args.max_scan_cards < 1
or args.max_scroll_rounds < 1
or args.max_items < 1
or args.max_expand_rounds < 1
or args.max_scan_replies < 1
or args.max_expand_seconds < 1
):
print(
"max-scan-cards / max-scroll-rounds / max-items / max-expand-rounds / "
"max-scan-replies / max-expand-seconds must be positive.",
file=sys.stderr,
)
return 1
repo_root = Path(__file__).resolve().parents[3]
pw_shared = Path(os.environ.get("PW_SHARED_WRAPPER", str(repo_root / "tools/pw"))).expanduser()
if not pw_shared.exists() or not pw_shared.is_file():
print(f"Shared Playwright wrapper not found: {pw_shared}", file=sys.stderr)
return 1
if not os.access(pw_shared, os.X_OK):
print(f"Shared Playwright wrapper is not executable: {pw_shared}", file=sys.stderr)
return 1
os.environ["PLAYWRIGHT_SHARED_INIT_MODE"] = "headless" if args.no_headed else "headed"
report_path = (
Path(args.report).expanduser().resolve()
if args.report
else (
repo_root
/ "output/xhs-reply"
/ f"reply-report-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
).resolve()
)
report_path.parent.mkdir(parents=True, exist_ok=True)
try:
ensure_pw_preflight(pw_shared)
run_pw(pw_shared, "snapshot")
login = is_logged_in(pw_shared)
if not bool(login.get("logged_in")):
print(
"XiaoHongShu is not logged in. Please log in at https://www.xiaohongshu.com and rerun.",
file=sys.stderr,
)
return 2
self_profile_id = parse_self_profile_id(str(login.get("profile_href", "")))
scope = open_comment_notifications(pw_shared)
if bool(scope.get("requires_login")):
print(
"XiaoHongShu is not logged in. Please log in at https://www.xiaohongshu.com and rerun.",
file=sys.stderr,
)
return 2
if not bool(scope.get("ok")):
raise FlowError("Failed to switch to 评论和@ tab.")
cards = collect_notification_cards(
pw_shared,
max_scan_cards=args.max_scan_cards,
max_scroll_rounds=args.max_scroll_rounds,
)
if not cards:
summary = {
"mode": args.mode,
"scanned_cards": 0,
"processed_items": 0,
"replies_sent": 0,
"planned": 0,
"skipped_duplicate": 0,
"already_replied": 0,
"failed": 0,
"message": "No actionable cards found.",
}
report = {"summary": summary, "items": []}
report_path.write_text(
json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8"
)
print(f"No actionable cards found. Report: {report_path}")
return 0
run_target_keys: set[str] = set()
run_dedupe_keys: set[str] = set()
items: list[dict[str, Any]] = []
consecutive_non_actionable = 0
for card in cards:
if len(items) >= args.max_items:
break
working_card = dict(card)
open_result: Optional[dict[str, Any]] = None
if not str(working_card.get("note_url", "")).strip():
open_result = open_note_detail_from_card(
pw_shared,
card=working_card,
)
opened_url = str(open_result.get("url", "")).strip()
if re.search(r"/(explore|discovery/item|note)/", opened_url):
working_card["note_url"] = opened_url
if str(open_result.get("reason", "")).strip() == "content_deleted":
row_deleted: dict[str, Any] = {
"timestamp": now_utc_iso(),
"mode": args.mode,
"card": card,
"note_url": opened_url,
"note_id": "",
"note_title": "",
"note_author": "",
"event_type": str(card.get("event_type", "")),
"notifier": str(card.get("notifier", "")),
"notifier_comment": "",
"parent_comment_id": "",
"status": "skipped_content_deleted",
"liked": "no",
"card_open": open_result,
}
items.append(row_deleted)
consecutive_non_actionable += 1
if consecutive_non_actionable >= args.early_stop_non_actionable:
break
continue
detail = collect_note_detail_context(
pw_shared,
card=working_card,
self_profile_id=self_profile_id,
reuse_current_page=bool(open_result and open_result.get("opened")),
)
target = detail.get("target") if isinstance(detail.get("target"), dict) else {}
target_comment = str(target.get("content", "")).strip()
parent_comment_id = str(target.get("parent_comment_id", "")).strip()
note_id = str(detail.get("note_id", "")).strip()
row: dict[str, Any] = {
"timestamp": now_utc_iso(),
"mode": args.mode,
"card": card,
"note_url": str(detail.get("note_url", working_card.get("note_url", ""))),
"note_id": note_id,
"note_title": str(detail.get("note_title", "")),
"note_author": str(detail.get("note_author", "")),
"event_type": str(card.get("event_type", "")),
"notifier": str(card.get("notifier", "")),
"notifier_comment": target_comment,
"parent_comment_id": parent_comment_id,
"status": "",
"liked": "no",
}
if open_result is not None:
row["card_open"] = open_result
if not detail.get("target_found") or not note_id or not parent_comment_id:
row["status"] = "failed"
row["failure_reason"] = str(detail.get("reason", "missing_target_or_ids"))
items.append(row)
consecutive_non_actionable += 1
if consecutive_non_actionable >= args.early_stop_non_actionable:
break
continue
quality = classify_comment_quality(target_comment)
row["comment_quality"] = quality
reply_text, confidence, reason = draft_reply(
target_comment=target_comment,
note_title=str(detail.get("note_title", "")),
fixed_reply=args.fixed_reply,
)
row["reply_text"] = reply_text
row["confidence"] = confidence
row["reply_reason"] = reason
target_key = f"{note_id}|{parent_comment_id}"
dedupe = f"{target_key}|{reply_hash(reply_text)}"
row["target_key"] = target_key
row["dedupe_key"] = dedupe
if target_key in run_target_keys or dedupe in run_dedupe_keys:
row["status"] = "skipped_duplicate"
items.append(row)
consecutive_non_actionable += 1
if consecutive_non_actionable >= args.early_stop_non_actionable:
break
continue
reply_state = check_reply_state_for_target(
pw_shared,
parent_comment_id=parent_comment_id,
self_profile_id=self_profile_id,
max_expand_rounds=args.max_expand_rounds,
max_scan_replies=args.max_scan_replies,
max_expand_seconds=args.max_expand_seconds,
)
row["reply_state"] = reply_state
state = str(reply_state.get("state", "unknown"))
if state == "already_replied":
row["status"] = "already_replied"
items.append(row)
consecutive_non_actionable += 1
if consecutive_non_actionable >= args.early_stop_non_actionable:
break
continue
if state == "unknown":
row["status"] = "needs_manual_review"
row["failure_reason"] = str(reply_state.get("reason", "reply_state_unknown"))
items.append(row)
consecutive_non_actionable += 1
if consecutive_non_actionable >= args.early_stop_non_actionable:
break
continue
consecutive_non_actionable = 0
# Runtime dedupe only: avoid duplicate send attempts within current run.
run_target_keys.add(target_key)
run_dedupe_keys.add(dedupe)
if quality == "satisfied" and not args.no_like:
row["liked"] = like_target_comment(pw_shared, parent_comment_id)
send_result = send_reply_for_comment(
pw_shared,
parent_comment_id=parent_comment_id,
reply_text=reply_text,
mode=args.mode,
)
row["status"] = str(send_result.get("status", "failed"))
if send_result.get("reason"):
row["failure_reason"] = str(send_result["reason"])
row["verified"] = bool(send_result.get("verified", False))
items.append(row)
summary = {
"mode": args.mode,
"scanned_cards": len(cards),
"processed_items": len(items),
"replies_sent": sum(1 for it in items if it.get("status") == "replied"),
"planned": sum(1 for it in items if it.get("status") == "planned"),
"already_replied": sum(1 for it in items if it.get("status") == "already_replied"),
"skipped_duplicate": sum(1 for it in items if it.get("status") == "skipped_duplicate"),
"needs_manual_review": sum(
1 for it in items if it.get("status") == "needs_manual_review"
),
"skipped_content_deleted": sum(
1 for it in items if it.get("status") == "skipped_content_deleted"
),
"failed": sum(1 for it in items if it.get("status") == "failed"),
"sent_unknown": sum(1 for it in items if it.get("status") == "sent_unknown"),
}
report = {"summary": summary, "items": items}
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Report: {report_path}")
print(
"Summary: "
+ ", ".join(
[
f"scanned={summary['scanned_cards']}",
f"processed={summary['processed_items']}",
f"replied={summary['replies_sent']}",
f"planned={summary['planned']}",
f"skipped_duplicate={summary['skipped_duplicate']}",
f"needs_manual_review={summary['needs_manual_review']}",
f"skipped_content_deleted={summary['skipped_content_deleted']}",
f"already_replied={summary['already_replied']}",
f"failed={summary['failed']}",
f"sent_unknown={summary['sent_unknown']}",
]
)
)
return 0
except FlowError as exc:
print(str(exc), file=sys.stderr)
return 1
if __name__ == "__main__":
raise SystemExit(main())