1476 lines
51 KiB
Python
Executable File
1476 lines
51 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""Run XiaoHongShu notification reply flow via shared Playwright CLI session."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import hashlib
|
||
import json
|
||
import os
|
||
import re
|
||
import subprocess
|
||
import sys
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any, Optional
|
||
|
||
|
||
class FlowError(RuntimeError):
|
||
"""Raised when a subprocess command in the flow fails."""
|
||
|
||
|
||
NETWORK_SANDBOX_PATTERNS = (
|
||
r"\bENOTFOUND\b",
|
||
r"\bEAI_AGAIN\b",
|
||
r"\bETIMEDOUT\b",
|
||
r"\bECONNREFUSED\b",
|
||
r"registry\.npmjs\.org",
|
||
)
|
||
|
||
|
||
def is_network_sandbox_error(output: str) -> bool:
|
||
return any(re.search(pattern, output, flags=re.IGNORECASE) for pattern in NETWORK_SANDBOX_PATTERNS)
|
||
|
||
|
||
def run_command(
|
||
cmd: list[str], *, capture_output: bool = True, check: bool = True
|
||
) -> subprocess.CompletedProcess[str]:
|
||
kwargs: dict[str, object] = {"text": True}
|
||
if capture_output:
|
||
kwargs["stdout"] = subprocess.PIPE
|
||
kwargs["stderr"] = subprocess.STDOUT
|
||
proc = subprocess.run(cmd, **kwargs)
|
||
if check and proc.returncode != 0:
|
||
output = (proc.stdout or "") if capture_output else ""
|
||
# Help operators quickly recover when Playwright CLI cannot reach npm
|
||
# under sandboxed network rules.
|
||
if is_network_sandbox_error(output):
|
||
raise FlowError(
|
||
"Playwright preflight failed due to network sandbox restrictions.\n"
|
||
"Rerun the same command with escalation (sandbox_permissions=require_escalated).\n\n"
|
||
f"Command: {' '.join(cmd)}\n{output}"
|
||
)
|
||
raise FlowError(
|
||
f"Command failed ({proc.returncode}): {' '.join(cmd)}\n{output}"
|
||
)
|
||
return proc
|
||
|
||
|
||
def run_pw(pw_shared: Path, *args: str) -> str:
|
||
proc = run_command([str(pw_shared), *args], capture_output=True)
|
||
return proc.stdout or ""
|
||
|
||
|
||
def ensure_pw_preflight(pw_shared: Path) -> None:
|
||
run_command([str(pw_shared), "--help"], capture_output=True, check=True)
|
||
|
||
|
||
def run_code_json(pw_shared: Path, function_source: str) -> dict[str, Any]:
|
||
out = run_pw(pw_shared, "run-code", function_source)
|
||
match = re.search(r"(?ms)^### Result\s*\n(.*?)\n### ", out)
|
||
if match:
|
||
payload = match.group(1).strip()
|
||
if payload.startswith("```"):
|
||
lines = payload.splitlines()
|
||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||
payload = "\n".join(lines[1:-1]).strip()
|
||
try:
|
||
data = json.loads(payload)
|
||
except json.JSONDecodeError:
|
||
data = None
|
||
else:
|
||
if isinstance(data, dict):
|
||
return data
|
||
return {"value": data}
|
||
|
||
marker = "__CODEX_JSON__"
|
||
for line in reversed(out.splitlines()):
|
||
idx = line.find(marker)
|
||
if idx >= 0:
|
||
payload = line[idx + len(marker) :].strip()
|
||
try:
|
||
data = json.loads(payload)
|
||
except json.JSONDecodeError as exc: # pragma: no cover - runtime guard
|
||
raise FlowError(
|
||
f"Failed to parse JSON payload from Playwright output:\n{out}"
|
||
) from exc
|
||
if isinstance(data, dict):
|
||
return data
|
||
return {"value": data}
|
||
raise FlowError(
|
||
"Playwright output did not contain expected JSON marker.\n" + out[-5000:]
|
||
)
|
||
|
||
|
||
def now_utc_iso() -> str:
|
||
return datetime.now(timezone.utc).isoformat()
|
||
|
||
|
||
def normalize_text(text: str) -> str:
|
||
value = text.strip()
|
||
value = re.sub(r"\s+", " ", value)
|
||
value = (
|
||
value.replace(",", ",")
|
||
.replace("。", ".")
|
||
.replace("!", "!")
|
||
.replace("?", "?")
|
||
.replace(":", ":")
|
||
)
|
||
value = re.sub(r"^回复\s*[^::]{1,30}\s*[::]\s*", "", value)
|
||
return value.strip()
|
||
|
||
|
||
def reply_hash(reply_text: str) -> str:
|
||
return hashlib.sha256(normalize_text(reply_text).encode("utf-8")).hexdigest()
|
||
|
||
|
||
def parse_self_profile_id(profile_href: str) -> str:
|
||
match = re.search(r"/user/profile/([^/?#]+)", profile_href)
|
||
if not match:
|
||
return ""
|
||
return match.group(1)
|
||
|
||
|
||
def classify_comment_quality(comment_text: str) -> str:
|
||
text = normalize_text(comment_text)
|
||
if not text:
|
||
return "not_satisfied"
|
||
if re.fullmatch(r"[\W_]+", text):
|
||
return "not_satisfied"
|
||
if re.search(r"(加v|vx|私聊我|互赞|引流|广告)", text, flags=re.IGNORECASE):
|
||
return "not_satisfied"
|
||
if re.search(r"(http://|https://|www\.)", text, flags=re.IGNORECASE):
|
||
return "not_satisfied"
|
||
return "satisfied"
|
||
|
||
|
||
def draft_reply(
|
||
*,
|
||
target_comment: str,
|
||
note_title: str,
|
||
fixed_reply: str | None,
|
||
) -> tuple[str, str, str]:
|
||
if fixed_reply:
|
||
return fixed_reply.strip(), "high", "fixed_reply"
|
||
|
||
text = normalize_text(target_comment)
|
||
lower = text.lower()
|
||
has_question = "?" in text or "?" in target_comment or any(
|
||
key in text for key in ("怎么", "如何", "可以", "能不能", "是否", "多少", "多久", "啥")
|
||
)
|
||
|
||
if any(k in lower for k in ("business", "gpt", "chatgpt")) and any(
|
||
k in text for k in ("额度", "配额", "套餐", "方案", "订阅")
|
||
):
|
||
return "Business额度按当前套餐走,升级套餐额度会同步提高。", "high", "known_business_quota"
|
||
|
||
if has_question:
|
||
if len(text) <= 8:
|
||
return "这个细节怕说不准,方便私信我,我详细说下。", "low", "question_too_short"
|
||
if any(k in text for k in ("bug", "报错", "错误", "失败", "不能", "无效")):
|
||
return "先看报错信息和步骤,一般定位后就能快速解决。", "medium", "diagnostic_hint"
|
||
if note_title:
|
||
return "这个要看具体场景,按你这条情况通常可以这样处理。", "medium", "cautious_direct"
|
||
return "这个细节怕说不准,方便私信我,我详细说下。", "low", "missing_context"
|
||
|
||
if any(k in text for k in ("谢谢", "有用", "喜欢", "学到了", "收藏")):
|
||
return "谢谢你支持,我会继续更新更实用的内容。", "high", "positive_feedback"
|
||
|
||
return "感谢留言,这条我后续会补充得更清楚。", "medium", "default"
|
||
|
||
|
||
def is_logged_in(pw_shared: Path) -> dict[str, Any]:
|
||
return run_code_json(
|
||
pw_shared,
|
||
r"""
|
||
async (page) => {
|
||
await page.goto('https://www.xiaohongshu.com/notification', { waitUntil: 'domcontentloaded' });
|
||
await page.waitForTimeout(1000);
|
||
const data = await page.evaluate(() => {
|
||
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
|
||
const text = document.body?.innerText || '';
|
||
const isLoginPage =
|
||
/\/login/.test(location.pathname) ||
|
||
/手机号登录|获取验证码|扫码|登录后推荐更懂你的笔记|马上登录/.test(text);
|
||
|
||
const woNodes = Array.from(document.querySelectorAll('a,button,span,div'))
|
||
.filter((el) => normalize(el.textContent) === '我')
|
||
.slice(0, 20);
|
||
const woProfileCandidates = [];
|
||
for (const node of woNodes) {
|
||
const anchor = node.closest('a[href]');
|
||
if (anchor) woProfileCandidates.push((anchor.getAttribute('href') || '').trim());
|
||
}
|
||
const woProfileHref = woProfileCandidates.find((href) => /\/user\/profile\//.test(href)) || '';
|
||
|
||
const hasMentionsTab = Array.from(document.querySelectorAll('a,button,div,span')).some((el) => {
|
||
const t = normalize(el.textContent);
|
||
return t === '评论和@' || t === '评论与@';
|
||
});
|
||
|
||
return {
|
||
logged_in: Boolean(!isLoginPage && (hasMentionsTab || woProfileHref)),
|
||
profile_href: woProfileHref,
|
||
is_login_page: isLoginPage,
|
||
has_mentions_tab: hasMentionsTab,
|
||
};
|
||
});
|
||
return { ...data, url: page.url() };
|
||
}
|
||
""".strip(),
|
||
)
|
||
|
||
|
||
def open_comment_notifications(pw_shared: Path) -> dict[str, Any]:
|
||
return run_code_json(
|
||
pw_shared,
|
||
r"""
|
||
async (page) => {
|
||
await page.goto('https://www.xiaohongshu.com/notification', { waitUntil: 'domcontentloaded' });
|
||
await page.waitForTimeout(1000);
|
||
|
||
const tryClick = async (locator) => {
|
||
try {
|
||
if (await locator.count()) {
|
||
await locator.first().click({ timeout: 2000 });
|
||
return true;
|
||
}
|
||
} catch (_) {}
|
||
return false;
|
||
};
|
||
|
||
let clicked = false;
|
||
clicked = clicked || await tryClick(page.getByRole('tab', { name: /评论和@|评论与@/i }));
|
||
clicked = clicked || await tryClick(page.getByRole('button', { name: /评论和@|评论与@/i }));
|
||
clicked = clicked || await tryClick(page.getByText(/评论和@|评论与@/i));
|
||
await page.waitForTimeout(800);
|
||
|
||
const pageState = await page.evaluate(() => {
|
||
const text = document.body?.innerText || '';
|
||
const requiresLogin =
|
||
/\/login/.test(location.pathname) ||
|
||
/手机号登录|获取验证码|扫码|登录后推荐更懂你的笔记|马上登录/.test(text);
|
||
return { requires_login: requiresLogin };
|
||
});
|
||
|
||
return {
|
||
ok: Boolean((clicked || /\/notification/.test(page.url())) && !pageState.requires_login),
|
||
url: page.url(),
|
||
requires_login: pageState.requires_login,
|
||
};
|
||
}
|
||
""".strip(),
|
||
)
|
||
|
||
|
||
def collect_notification_cards(
|
||
pw_shared: Path, *, max_scan_cards: int, max_scroll_rounds: int
|
||
) -> list[dict[str, Any]]:
|
||
options_json = json.dumps(
|
||
{
|
||
"maxScanCards": max_scan_cards,
|
||
"maxScrollRounds": max_scroll_rounds,
|
||
},
|
||
ensure_ascii=False,
|
||
)
|
||
function_source = (
|
||
r"""
|
||
async (page) => {
|
||
const options = __OPTIONS__;
|
||
const data = await page.evaluate(async (options) => {
|
||
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
||
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
|
||
const eventRe = /评论了你的笔记|回复了你的评论|在评论中@了你|你的好友评论了你的笔记|你的好友回复了你的评论|你的好友在评论中@了你/;
|
||
|
||
const eventTypeFromText = (text) => {
|
||
if (/回复了你的评论|你的好友回复了你的评论/.test(text)) return '回复了你的评论';
|
||
if (/在评论中@了你|你的好友在评论中@了你|@了你/.test(text)) return '在评论中@了你';
|
||
if (/评论了你的笔记|你的好友评论了你的笔记/.test(text)) return '评论了你的笔记';
|
||
return '';
|
||
};
|
||
|
||
const timestampFromText = (text) => {
|
||
const m = text.match(
|
||
/(刚刚|\d+分钟前|\d+小时前|昨天(?:\s*\d{1,2}:\d{2})?|前天(?:\s*\d{1,2}:\d{2})?|\d{1,2}[-\/]\d{1,2}(?:\s*\d{1,2}:\d{2})?)/
|
||
);
|
||
return m ? normalize(m[1]) : '';
|
||
};
|
||
|
||
const toAbsUrl = (href) => {
|
||
if (!href) return '';
|
||
try {
|
||
return new URL(href, location.origin).toString();
|
||
} catch {
|
||
return '';
|
||
}
|
||
};
|
||
|
||
const getScrollContainer = () => {
|
||
const candidates = Array.from(document.querySelectorAll('*')).filter((el) => {
|
||
const style = window.getComputedStyle(el);
|
||
const scrollable = /(auto|scroll)/.test(style.overflowY || '');
|
||
return scrollable && el.scrollHeight > el.clientHeight + 40;
|
||
});
|
||
candidates.sort((a, b) => b.clientHeight - a.clientHeight);
|
||
return candidates[0] || null;
|
||
};
|
||
|
||
const isVisible = (el) => {
|
||
if (!el) return false;
|
||
const rect = el.getBoundingClientRect();
|
||
const style = window.getComputedStyle(el);
|
||
return (
|
||
rect.width > 20 &&
|
||
rect.height > 14 &&
|
||
style.display !== 'none' &&
|
||
style.visibility !== 'hidden' &&
|
||
style.opacity !== '0'
|
||
);
|
||
};
|
||
|
||
const parseCard = (text, eventType) => {
|
||
const cleaned = normalize(text);
|
||
const idx = cleaned.indexOf(eventType);
|
||
let notifier = '';
|
||
if (idx > 0) {
|
||
notifier = normalize(cleaned.slice(0, idx).replace(/你的好友/g, ''));
|
||
}
|
||
notifier = notifier.slice(-20);
|
||
let snippet = cleaned;
|
||
if (notifier) snippet = snippet.replace(notifier, '');
|
||
snippet = snippet.replace(eventType, '').replace(/你的好友/g, '');
|
||
const ts = timestampFromText(cleaned);
|
||
if (ts) snippet = snippet.replace(ts, '');
|
||
snippet = snippet
|
||
.replace(/回复\s*取消评论将会清空已经输入的内容确认返回/g, '')
|
||
.replace(/回复\s*取消评论/g, '')
|
||
.replace(/回到顶部/g, '');
|
||
snippet = normalize(snippet).slice(0, 120);
|
||
return {
|
||
notifier,
|
||
snippet,
|
||
timestamp: ts,
|
||
cardText: cleaned.slice(0, 260),
|
||
};
|
||
};
|
||
|
||
const cards = [];
|
||
const seen = new Set();
|
||
let unchangedRounds = 0;
|
||
let lastCount = 0;
|
||
|
||
for (let round = 0; round < options.maxScrollRounds; round += 1) {
|
||
const roots = Array.from(document.querySelectorAll('div,li,article,section'));
|
||
for (const root of roots) {
|
||
if (!isVisible(root)) continue;
|
||
const text = normalize(root.innerText || root.textContent || '');
|
||
if (!text || text.length < 12 || text.length > 260) continue;
|
||
if (!eventRe.test(text)) continue;
|
||
if (/评论和@|赞和收藏|新增关注/.test(text) && text.length < 30) continue;
|
||
if (/温馨提示|广告屏蔽|沪ICP备/.test(text)) continue;
|
||
|
||
const eventType = eventTypeFromText(text);
|
||
if (!eventType) continue;
|
||
const parsed = parseCard(text, eventType);
|
||
if (!parsed.notifier && !parsed.snippet) continue;
|
||
|
||
const linkEl = root.querySelector('a[href*="/explore/"],a[href*="/discovery/item/"],a[href*="/note/"]');
|
||
const noteUrl = toAbsUrl(linkEl?.getAttribute('href') || '');
|
||
const key = [
|
||
parsed.notifier,
|
||
eventType,
|
||
parsed.timestamp,
|
||
parsed.snippet.slice(0, 60),
|
||
noteUrl,
|
||
].join('|');
|
||
if (seen.has(key)) continue;
|
||
seen.add(key);
|
||
|
||
cards.push({
|
||
note_url: noteUrl,
|
||
notifier: parsed.notifier,
|
||
event_type: eventType,
|
||
notifier_snippet: parsed.snippet,
|
||
card_text: parsed.cardText,
|
||
timestamp_text: parsed.timestamp,
|
||
});
|
||
if (cards.length >= options.maxScanCards) break;
|
||
}
|
||
|
||
if (cards.length >= options.maxScanCards) break;
|
||
|
||
const scroller = getScrollContainer();
|
||
if (scroller) {
|
||
scroller.scrollTop += Math.max(320, Math.floor(scroller.clientHeight * 0.92));
|
||
} else {
|
||
window.scrollBy(0, Math.max(520, Math.floor(window.innerHeight * 0.92)));
|
||
}
|
||
await sleep(700);
|
||
|
||
if (cards.length === lastCount) {
|
||
unchangedRounds += 1;
|
||
} else {
|
||
unchangedRounds = 0;
|
||
lastCount = cards.length;
|
||
}
|
||
if (unchangedRounds >= 2) break;
|
||
}
|
||
|
||
return {
|
||
cards,
|
||
scanned: cards.length,
|
||
};
|
||
}, options);
|
||
return data;
|
||
}
|
||
""".strip().replace("__OPTIONS__", options_json)
|
||
)
|
||
result = run_code_json(
|
||
pw_shared,
|
||
function_source,
|
||
)
|
||
cards = result.get("cards")
|
||
if isinstance(cards, list):
|
||
return [c for c in cards if isinstance(c, dict)]
|
||
return []
|
||
|
||
|
||
def open_note_detail_from_card(
|
||
pw_shared: Path,
|
||
*,
|
||
card: dict[str, Any],
|
||
) -> dict[str, Any]:
|
||
payload_json = json.dumps(card, ensure_ascii=False)
|
||
function_source = (
|
||
r"""
|
||
async (page) => {
|
||
const card = __CARD__;
|
||
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
|
||
const detailUrlRe = /\/explore\/|\/discovery\/item\/|\/note\//;
|
||
|
||
await page.goto('https://www.xiaohongshu.com/notification', { waitUntil: 'domcontentloaded' });
|
||
await page.waitForTimeout(1000);
|
||
const tab = page.getByText(/评论和@|评论与@/i).first();
|
||
if (await tab.count()) {
|
||
await tab.click().catch(() => null);
|
||
await page.waitForTimeout(400);
|
||
}
|
||
|
||
const eventType = normalize(card.event_type || '');
|
||
const notifier = normalize(card.notifier || '');
|
||
const snippet = normalize(card.notifier_snippet || '');
|
||
const timestamp = normalize(card.timestamp_text || '');
|
||
const eventRe = /评论了你的笔记|回复了你的评论|在评论中@了你|你的好友评论了你的笔记|你的好友回复了你的评论|你的好友在评论中@了你/;
|
||
|
||
const matchInfo = await page.evaluate(({ eventType, notifier, snippet, timestamp }) => {
|
||
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
|
||
const eventRe =
|
||
/评论了你的笔记|回复了你的评论|在评论中@了你|你的好友评论了你的笔记|你的好友回复了你的评论|你的好友在评论中@了你/;
|
||
const visible = (el) => {
|
||
const rect = el.getBoundingClientRect();
|
||
const style = window.getComputedStyle(el);
|
||
return (
|
||
rect.width > 20 &&
|
||
rect.height > 14 &&
|
||
style.display !== 'none' &&
|
||
style.visibility !== 'hidden' &&
|
||
style.opacity !== '0'
|
||
);
|
||
};
|
||
document.querySelectorAll('[data-codex-card-target="1"]').forEach((el) => {
|
||
el.removeAttribute('data-codex-card-target');
|
||
});
|
||
const roots = Array.from(document.querySelectorAll('div,li,article,section'));
|
||
let best = null;
|
||
let bestScore = -999;
|
||
let bestText = '';
|
||
for (const el of roots) {
|
||
if (!visible(el)) continue;
|
||
const text = normalize(el.innerText || el.textContent || '');
|
||
if (!text || text.length < 12 || text.length > 260) continue;
|
||
if (!eventRe.test(text)) continue;
|
||
|
||
let score = 0;
|
||
if (eventType && text.includes(eventType)) score += 5;
|
||
if (notifier && text.includes(notifier)) score += 4;
|
||
if (timestamp && text.includes(timestamp)) score += 1;
|
||
if (snippet) {
|
||
const tokens = snippet.split(' ').filter((t) => t.length >= 2).slice(0, 4);
|
||
for (const token of tokens) {
|
||
if (text.includes(token)) score += 1;
|
||
}
|
||
}
|
||
if (/回复/.test(text)) score += 1;
|
||
|
||
if (score > bestScore) {
|
||
best = el;
|
||
bestScore = score;
|
||
bestText = text.slice(0, 220);
|
||
}
|
||
}
|
||
if (!best) return { found: false };
|
||
best.setAttribute('data-codex-card-target', '1');
|
||
return { found: true, score: bestScore, text: bestText };
|
||
}, { eventType, notifier, snippet, timestamp });
|
||
|
||
if (!matchInfo?.found) {
|
||
return { opened: false, reason: 'card_not_matched' };
|
||
}
|
||
|
||
const target = page.locator('[data-codex-card-target="1"]').first();
|
||
if (!(await target.count())) {
|
||
return { opened: false, reason: 'target_missing_after_match' };
|
||
}
|
||
|
||
await target.scrollIntoViewIfNeeded().catch(() => null);
|
||
// Always click comment content area, never the "回复" action.
|
||
let clickTarget = target.locator('.main,.item-main,.content-main,.comment-content,[class*="comment-content"]').first();
|
||
if (!(await clickTarget.count())) {
|
||
clickTarget = target.locator('.content,.info,.interaction-hint,[class*="info"]').first();
|
||
}
|
||
if (!(await clickTarget.count())) {
|
||
return {
|
||
opened: false,
|
||
reason: 'comment_click_target_not_found',
|
||
url: page.url(),
|
||
via: 'card_match',
|
||
match: matchInfo,
|
||
};
|
||
}
|
||
await clickTarget.click({ timeout: 3000 }).catch(() => null);
|
||
const waitOutcome = async (timeoutMs) => {
|
||
const start = Date.now();
|
||
const deletedReSource = '该内容已被删除|内容已被删除|笔记已删除|当前笔记暂时无法浏览';
|
||
while (Date.now() - start < timeoutMs) {
|
||
const cur = page.url();
|
||
if (detailUrlRe.test(cur)) return { state: 'opened', url: cur };
|
||
if (/\/404(?:[/?#]|$)/.test(cur)) return { state: 'deleted', url: cur };
|
||
const deletedByToast = await page
|
||
.evaluate((reSource) => {
|
||
const re = new RegExp(reSource, 'i');
|
||
const visible = (el) => {
|
||
const rect = el.getBoundingClientRect();
|
||
const style = window.getComputedStyle(el);
|
||
return (
|
||
rect.width > 10 &&
|
||
rect.height > 10 &&
|
||
style.display !== 'none' &&
|
||
style.visibility !== 'hidden' &&
|
||
style.opacity !== '0'
|
||
);
|
||
};
|
||
const nodes = Array.from(document.querySelectorAll('div,span,p,section,article'));
|
||
return nodes.some((el) => visible(el) && re.test((el.textContent || '').trim()));
|
||
}, deletedReSource)
|
||
.catch(() => false);
|
||
if (deletedByToast) return { state: 'deleted', url: cur };
|
||
await page.waitForTimeout(180);
|
||
}
|
||
const finalUrl = page.url();
|
||
if (/\/404(?:[/?#]|$)/.test(finalUrl)) return { state: 'deleted', url: finalUrl };
|
||
if (detailUrlRe.test(finalUrl)) return { state: 'opened', url: finalUrl };
|
||
return { state: 'stayed', url: finalUrl };
|
||
};
|
||
|
||
let outcome = await waitOutcome(3600);
|
||
let after = outcome.url;
|
||
if (outcome.state === 'deleted') {
|
||
return {
|
||
opened: false,
|
||
reason: 'content_deleted',
|
||
url: after,
|
||
via: 'card_match',
|
||
match: matchInfo,
|
||
};
|
||
}
|
||
|
||
if (outcome.state !== 'opened') {
|
||
const anchor = target
|
||
.locator('a[href*="/explore/"],a[href*="/discovery/item/"],a[href*="/note/"]')
|
||
.first();
|
||
if (await anchor.count()) {
|
||
await anchor.click().catch(() => null);
|
||
outcome = await waitOutcome(3600);
|
||
after = outcome.url;
|
||
if (outcome.state === 'deleted') {
|
||
return {
|
||
opened: false,
|
||
reason: 'content_deleted',
|
||
url: after,
|
||
via: 'card_match',
|
||
match: matchInfo,
|
||
};
|
||
}
|
||
}
|
||
}
|
||
|
||
return {
|
||
opened: outcome.state === 'opened' || detailUrlRe.test(after),
|
||
url: after,
|
||
via: 'card_match',
|
||
match: matchInfo,
|
||
};
|
||
}
|
||
""".strip().replace("__CARD__", payload_json)
|
||
)
|
||
result = run_code_json(pw_shared, function_source)
|
||
if not isinstance(result, dict):
|
||
return {"opened": False, "reason": "invalid_open_result"}
|
||
return result
|
||
|
||
|
||
def collect_note_detail_context(
|
||
pw_shared: Path,
|
||
*,
|
||
card: dict[str, Any],
|
||
self_profile_id: str,
|
||
reuse_current_page: bool = False,
|
||
) -> dict[str, Any]:
|
||
payload_json = json.dumps(
|
||
{
|
||
"card": card,
|
||
"selfProfileId": self_profile_id,
|
||
"reuseCurrentPage": reuse_current_page,
|
||
},
|
||
ensure_ascii=False,
|
||
)
|
||
function_source = (
|
||
r"""
|
||
async (page) => {
|
||
const input = __PAYLOAD__;
|
||
const card = input.card || {};
|
||
const selfProfileId = input.selfProfileId || '';
|
||
const reuseCurrentPage = Boolean(input.reuseCurrentPage);
|
||
const detailUrlRe = /\/explore\/|\/discovery\/item\/|\/note\//;
|
||
|
||
if (!card.note_url) {
|
||
return { target_found: false, reason: 'missing_note_url' };
|
||
}
|
||
|
||
if (!reuseCurrentPage) {
|
||
await page.goto(card.note_url, { waitUntil: 'domcontentloaded' });
|
||
await page.waitForTimeout(1200);
|
||
} else {
|
||
const cur = page.url();
|
||
if (!detailUrlRe.test(cur)) {
|
||
await page.goto(card.note_url, { waitUntil: 'domcontentloaded' });
|
||
await page.waitForTimeout(1200);
|
||
}
|
||
}
|
||
|
||
const pageUrl = page.url();
|
||
const data = await page.evaluate(
|
||
({ card, selfProfileId, pageUrl }) => {
|
||
const normalize = (v) => (v || '').replace(/\s+/g, ' ').trim();
|
||
const noteIdMatch = (pageUrl.match(/\/explore\/([^/?#]+)/) || [])[1] || '';
|
||
|
||
const titleEl =
|
||
document.querySelector('h1') ||
|
||
document.querySelector('[class*="title"]') ||
|
||
document.querySelector('meta[property="og:title"]');
|
||
const noteTitle = normalize(titleEl?.textContent || titleEl?.getAttribute?.('content') || '');
|
||
|
||
const noteDescEl =
|
||
document.querySelector('[class*="desc"]') ||
|
||
document.querySelector('[class*="content"]') ||
|
||
document.querySelector('meta[name="description"]');
|
||
const noteDesc = normalize(noteDescEl?.textContent || noteDescEl?.getAttribute?.('content') || '');
|
||
|
||
const noteAuthorEl =
|
||
document.querySelector('[class*="author"]') ||
|
||
document.querySelector('a[href*="/user/profile/"] span') ||
|
||
document.querySelector('a[href*="/user/profile/"]');
|
||
const noteAuthor = normalize(noteAuthorEl?.textContent || '');
|
||
|
||
const hashtags = Array.from(
|
||
new Set(((noteTitle + ' ' + noteDesc).match(/#[^\s#]+/g) || []).slice(0, 12))
|
||
);
|
||
|
||
const commentNodes = Array.from(document.querySelectorAll('[id^="comment-"], .comment-item'));
|
||
const comments = [];
|
||
for (const node of commentNodes) {
|
||
const domId = node?.id && String(node.id).startsWith('comment-') ? String(node.id) : '';
|
||
if (!domId) continue;
|
||
const parentCommentId = domId.replace(/^comment-/, '');
|
||
if (!parentCommentId) continue;
|
||
|
||
const authorEl =
|
||
node.querySelector('.name') ||
|
||
node.querySelector('.author') ||
|
||
node.querySelector('[class*="author"]') ||
|
||
node.querySelector('a[href*="/user/profile/"]');
|
||
const author = normalize(authorEl?.textContent || '');
|
||
|
||
const contentEl =
|
||
node.querySelector('.content') ||
|
||
node.querySelector('.desc') ||
|
||
node.querySelector('[class*="content"]') ||
|
||
node.querySelector('[class*="desc"]');
|
||
const content = normalize(contentEl?.textContent || '');
|
||
|
||
const timeEl = node.querySelector('.time') || node.querySelector('[class*="time"]');
|
||
const timestampText = normalize(timeEl?.textContent || '');
|
||
|
||
const profileLinks = Array.from(node.querySelectorAll('a[href*="/user/profile/"]'));
|
||
const repliedByMe = Boolean(
|
||
selfProfileId &&
|
||
profileLinks.some((a) => (a.getAttribute('href') || '').includes(`/user/profile/${selfProfileId}`))
|
||
);
|
||
|
||
comments.push({
|
||
dom_id: domId,
|
||
parent_comment_id: parentCommentId,
|
||
author,
|
||
content,
|
||
timestamp_text: timestampText,
|
||
replied_by_me: repliedByMe,
|
||
});
|
||
}
|
||
|
||
const cardSnippet = normalize(card?.notifier_snippet || '');
|
||
const snippetTokens = cardSnippet.split(' ').filter((t) => t.length >= 2).slice(0, 6);
|
||
let best = null;
|
||
let bestScore = -999;
|
||
|
||
for (const c of comments) {
|
||
let score = 0;
|
||
if (card?.notifier && c.author && c.author.includes(card.notifier)) score += 3;
|
||
for (const token of snippetTokens) {
|
||
if (token && c.content && c.content.includes(token)) score += 1;
|
||
}
|
||
if (card?.event_type && String(card.event_type).includes('@') && c.content.includes('@')) score += 1;
|
||
if (!c.content) score -= 1;
|
||
if (score > bestScore) {
|
||
bestScore = score;
|
||
best = c;
|
||
}
|
||
}
|
||
|
||
if (!best && comments.length > 0) best = comments[0];
|
||
|
||
return {
|
||
note_url: pageUrl,
|
||
note_id: noteIdMatch,
|
||
note_title: noteTitle,
|
||
note_desc: noteDesc,
|
||
note_author: noteAuthor,
|
||
hashtags,
|
||
card,
|
||
comment_count: comments.length,
|
||
target_found: Boolean(best),
|
||
target: best,
|
||
best_score: bestScore,
|
||
};
|
||
},
|
||
{ card, selfProfileId, pageUrl }
|
||
);
|
||
return data;
|
||
}
|
||
""".strip().replace("__PAYLOAD__", payload_json)
|
||
)
|
||
result = run_code_json(
|
||
pw_shared,
|
||
function_source,
|
||
)
|
||
if not isinstance(result, dict):
|
||
raise FlowError("Invalid detail context response from Playwright.")
|
||
return result
|
||
|
||
|
||
def check_reply_state_for_target(
|
||
pw_shared: Path,
|
||
*,
|
||
parent_comment_id: str,
|
||
self_profile_id: str,
|
||
max_expand_rounds: int,
|
||
max_scan_replies: int,
|
||
max_expand_seconds: int,
|
||
) -> dict[str, Any]:
|
||
result = run_code_json(
|
||
pw_shared,
|
||
f"""
|
||
async (page) => {{
|
||
const parentCommentId = {json.dumps(parent_comment_id, ensure_ascii=False)};
|
||
const selfProfileId = {json.dumps(self_profile_id, ensure_ascii=False)};
|
||
const maxExpandRounds = {int(max_expand_rounds)};
|
||
const maxScanReplies = {int(max_scan_replies)};
|
||
const maxExpandMs = {int(max_expand_seconds)} * 1000;
|
||
|
||
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
||
const rootId = `comment-${{parentCommentId}}`;
|
||
|
||
const scanState = async () => {{
|
||
return await page.evaluate(
|
||
({{ rootId, selfProfileId, maxScanReplies }}) => {{
|
||
const normalize = (v) => (v || '').replace(/\\s+/g, ' ').trim();
|
||
const isVisible = (el) => {{
|
||
if (!el) return false;
|
||
const rect = el.getBoundingClientRect();
|
||
const style = window.getComputedStyle(el);
|
||
return (
|
||
rect.width > 4 &&
|
||
rect.height > 4 &&
|
||
style.display !== 'none' &&
|
||
style.visibility !== 'hidden' &&
|
||
style.opacity !== '0'
|
||
);
|
||
}};
|
||
const hasMyProfileAnchor = (el) => {{
|
||
if (!el || !selfProfileId) return false;
|
||
const links = Array.from(el.querySelectorAll('a[href*="/user/profile/"]'));
|
||
return links.some((a) =>
|
||
(a.getAttribute('href') || '').includes(`/user/profile/${{selfProfileId}}`)
|
||
);
|
||
}};
|
||
|
||
const root = document.querySelector(`#${{rootId}}`);
|
||
if (!root) {{
|
||
return {{
|
||
root_found: false,
|
||
my_reply_found: false,
|
||
expandable_found: false,
|
||
reply_nodes_scanned: 0,
|
||
}};
|
||
}}
|
||
|
||
const nestedCommentNodes = Array.from(root.querySelectorAll('[id^="comment-"]'))
|
||
.filter((el) => (el.id || '') !== rootId)
|
||
.filter((el) => isVisible(el))
|
||
.slice(0, maxScanReplies);
|
||
|
||
let myReplyFound = false;
|
||
for (const node of nestedCommentNodes) {{
|
||
if (hasMyProfileAnchor(node)) {{
|
||
myReplyFound = true;
|
||
break;
|
||
}}
|
||
}}
|
||
|
||
const expandControls = Array.from(root.querySelectorAll('button,span,div,a'))
|
||
.filter((el) => isVisible(el))
|
||
.filter((el) => {{
|
||
const t = normalize(el.textContent || '');
|
||
if (!t || t.length > 30) return false;
|
||
if (/收起|已全部|没有更多|暂无更多/.test(t)) return false;
|
||
return /展开|更多|查看.*回复|全部回复|共\\d+条回复|显示更多|加载更多/.test(t);
|
||
}});
|
||
|
||
return {{
|
||
root_found: true,
|
||
my_reply_found: myReplyFound,
|
||
expandable_found: expandControls.length > 0,
|
||
reply_nodes_scanned: nestedCommentNodes.length,
|
||
}};
|
||
}},
|
||
{{ rootId, selfProfileId, maxScanReplies }}
|
||
);
|
||
}};
|
||
|
||
const clickExpand = async () => {{
|
||
return await page.evaluate(({{ rootId }}) => {{
|
||
const normalize = (v) => (v || '').replace(/\\s+/g, ' ').trim();
|
||
const isVisible = (el) => {{
|
||
if (!el) return false;
|
||
const rect = el.getBoundingClientRect();
|
||
const style = window.getComputedStyle(el);
|
||
return (
|
||
rect.width > 4 &&
|
||
rect.height > 4 &&
|
||
style.display !== 'none' &&
|
||
style.visibility !== 'hidden' &&
|
||
style.opacity !== '0'
|
||
);
|
||
}};
|
||
const root = document.querySelector(`#${{rootId}}`);
|
||
if (!root) return false;
|
||
const controls = Array.from(root.querySelectorAll('button,span,div,a'))
|
||
.filter((el) => isVisible(el))
|
||
.filter((el) => {{
|
||
const t = normalize(el.textContent || '');
|
||
if (!t || t.length > 30) return false;
|
||
if (/收起|已全部|没有更多|暂无更多/.test(t)) return false;
|
||
return /展开|更多|查看.*回复|全部回复|共\\d+条回复|显示更多|加载更多/.test(t);
|
||
}});
|
||
if (!controls.length) return false;
|
||
controls[0].click();
|
||
return true;
|
||
}}, {{ rootId }});
|
||
}};
|
||
|
||
const startedAt = Date.now();
|
||
let rounds = 0;
|
||
let expandClicks = 0;
|
||
let maxSeenReplyNodes = 0;
|
||
let sawExpandable = false;
|
||
|
||
while (rounds < maxExpandRounds && Date.now() - startedAt <= maxExpandMs) {{
|
||
rounds += 1;
|
||
const state = await scanState();
|
||
maxSeenReplyNodes = Math.max(maxSeenReplyNodes, state.reply_nodes_scanned || 0);
|
||
if (!state.root_found) {{
|
||
return {{
|
||
state: 'unknown',
|
||
reason: 'target_not_found',
|
||
rounds,
|
||
expand_clicks: expandClicks,
|
||
reply_nodes_scanned: maxSeenReplyNodes,
|
||
}};
|
||
}}
|
||
if (state.my_reply_found) {{
|
||
return {{
|
||
state: 'already_replied',
|
||
reason: 'my_reply_visible',
|
||
rounds,
|
||
expand_clicks: expandClicks,
|
||
reply_nodes_scanned: maxSeenReplyNodes,
|
||
}};
|
||
}}
|
||
|
||
if (!state.expandable_found) {{
|
||
return {{
|
||
state: 'not_replied',
|
||
reason: maxSeenReplyNodes > 0 ? 'visible_replies_no_mine' : 'no_replies_visible',
|
||
rounds,
|
||
expand_clicks: expandClicks,
|
||
reply_nodes_scanned: maxSeenReplyNodes,
|
||
}};
|
||
}}
|
||
|
||
sawExpandable = true;
|
||
const clicked = await clickExpand();
|
||
if (!clicked) {{
|
||
return {{
|
||
state: 'unknown',
|
||
reason: 'expand_click_failed',
|
||
rounds,
|
||
expand_clicks: expandClicks,
|
||
reply_nodes_scanned: maxSeenReplyNodes,
|
||
}};
|
||
}}
|
||
expandClicks += 1;
|
||
await sleep(300);
|
||
}}
|
||
|
||
if (sawExpandable) {{
|
||
return {{
|
||
state: 'unknown',
|
||
reason: 'expand_limit_reached',
|
||
rounds,
|
||
expand_clicks: expandClicks,
|
||
reply_nodes_scanned: maxSeenReplyNodes,
|
||
}};
|
||
}}
|
||
|
||
return {{
|
||
state: 'not_replied',
|
||
reason: 'default_not_replied',
|
||
rounds,
|
||
expand_clicks: expandClicks,
|
||
reply_nodes_scanned: maxSeenReplyNodes,
|
||
}};
|
||
}}
|
||
""".strip(),
|
||
)
|
||
if not isinstance(result, dict):
|
||
return {"state": "unknown", "reason": "invalid_reply_state_result"}
|
||
state = str(result.get("state", "unknown")).strip() or "unknown"
|
||
if state not in {"already_replied", "not_replied", "unknown"}:
|
||
state = "unknown"
|
||
result["state"] = state
|
||
return result
|
||
|
||
|
||
def like_target_comment(pw_shared: Path, parent_comment_id: str) -> str:
|
||
result = run_code_json(
|
||
pw_shared,
|
||
f"""
|
||
async (page) => {{
|
||
const parentCommentId = {json.dumps(parent_comment_id, ensure_ascii=False)};
|
||
const domId = `#comment-${{parentCommentId}}`;
|
||
const root = page.locator(domId).first();
|
||
if (!(await root.count())) return {{ status: 'like_failed', reason: 'target_not_found' }};
|
||
|
||
let likeBtn = root.getByRole('button', {{ name: /赞|like/i }}).first();
|
||
if (!(await likeBtn.count())) {{
|
||
likeBtn = root.locator('button,[role="button"]').filter({{ hasText: /赞|like/i }}).first();
|
||
}}
|
||
if (!(await likeBtn.count())) return {{ status: 'like_failed', reason: 'like_button_not_found' }};
|
||
|
||
const text = ((await likeBtn.textContent()) || '').replace(/\\s+/g, ' ').trim();
|
||
const cls = (await likeBtn.getAttribute('class')) || '';
|
||
const pressed = (await likeBtn.getAttribute('aria-pressed')) || '';
|
||
const alreadyLiked =
|
||
/已赞|取消赞|liked/i.test(text) ||
|
||
/active|liked|is-liked/i.test(cls) ||
|
||
pressed === 'true';
|
||
if (alreadyLiked) return {{ status: 'already_liked' }};
|
||
|
||
try {{
|
||
await likeBtn.click({{ timeout: 2000 }});
|
||
await page.waitForTimeout(500);
|
||
return {{ status: 'liked' }};
|
||
}} catch (_) {{
|
||
return {{ status: 'like_failed', reason: 'click_failed' }};
|
||
}}
|
||
}}
|
||
""".strip(),
|
||
)
|
||
return str(result.get("status", "like_failed"))
|
||
|
||
|
||
def send_reply_for_comment(
|
||
pw_shared: Path, *, parent_comment_id: str, reply_text: str, mode: str
|
||
) -> dict[str, Any]:
|
||
result = run_code_json(
|
||
pw_shared,
|
||
f"""
|
||
async (page) => {{
|
||
const parentCommentId = {json.dumps(parent_comment_id, ensure_ascii=False)};
|
||
const replyText = {json.dumps(reply_text, ensure_ascii=False)};
|
||
const mode = {json.dumps(mode, ensure_ascii=False)};
|
||
|
||
const root = page.locator(`#comment-${{parentCommentId}}`).first();
|
||
if (!(await root.count())) return {{ ok: false, status: 'failed', reason: 'target_not_found' }};
|
||
|
||
let replyButton = root.getByRole('button', {{ name: /回复/i }}).first();
|
||
if (!(await replyButton.count())) {{
|
||
replyButton = root.locator('button,[role="button"],span,div').filter({{ hasText: /回复/ }}).first();
|
||
}}
|
||
if (!(await replyButton.count())) {{
|
||
return {{ ok: false, status: 'failed', reason: 'reply_button_not_found' }};
|
||
}}
|
||
|
||
try {{
|
||
await replyButton.click({{ timeout: 2000 }});
|
||
}} catch (_) {{
|
||
return {{ ok: false, status: 'failed', reason: 'reply_click_failed' }};
|
||
}}
|
||
|
||
await page.waitForTimeout(500);
|
||
|
||
const pickVisibleEditor = async () => {{
|
||
const selectors = [
|
||
'textarea',
|
||
'.engage-bar [contenteditable="true"]',
|
||
'.engage-bar .content-input',
|
||
'[contenteditable="true"]',
|
||
'div[role="textbox"]',
|
||
'.content-input',
|
||
'.input-box',
|
||
];
|
||
for (const sel of selectors) {{
|
||
const loc = page.locator(sel);
|
||
const count = await loc.count();
|
||
for (let i = count - 1; i >= 0; i -= 1) {{
|
||
const node = loc.nth(i);
|
||
const visible = await node.isVisible().catch(() => false);
|
||
if (visible) return node;
|
||
}}
|
||
}}
|
||
return null;
|
||
}};
|
||
|
||
const editor = await pickVisibleEditor();
|
||
if (!editor) {{
|
||
return {{ ok: false, status: 'failed', reason: 'editor_not_found' }};
|
||
}}
|
||
|
||
const tagName = await editor.evaluate((el) => (el.tagName || '').toLowerCase()).catch(() => '');
|
||
if (tagName === 'textarea' || tagName === 'input') {{
|
||
await editor.click().catch(() => null);
|
||
await editor.fill(replyText).catch(() => null);
|
||
}} else {{
|
||
await editor.click().catch(() => null);
|
||
await page.keyboard.press('Meta+A').catch(() => null);
|
||
await page.keyboard.press('Control+A').catch(() => null);
|
||
await page.keyboard.type(replyText, {{ delay: 15 }}).catch(() => null);
|
||
const currentText = await editor.evaluate((el) => (el.textContent || '').trim()).catch(() => '');
|
||
if (!currentText) {{
|
||
await page.keyboard.insertText(replyText).catch(() => null);
|
||
}}
|
||
}}
|
||
|
||
if (mode !== 'live') {{
|
||
return {{ ok: true, status: 'planned' }};
|
||
}}
|
||
|
||
const sendBtn = page.getByRole('button', {{ name: /发送/i }}).last();
|
||
if (!(await sendBtn.count())) {{
|
||
return {{ ok: false, status: 'failed', reason: 'send_button_not_found' }};
|
||
}}
|
||
await sendBtn.click();
|
||
await page.waitForTimeout(1000);
|
||
|
||
const successToast = page.getByText(/评论成功|回复成功|发送成功/i).first();
|
||
const verified = await successToast.isVisible().catch(() => false);
|
||
return {{
|
||
ok: true,
|
||
status: verified ? 'replied' : 'sent_unknown',
|
||
verified,
|
||
}};
|
||
}}
|
||
""".strip(),
|
||
)
|
||
if not isinstance(result, dict):
|
||
return {"ok": False, "status": "failed", "reason": "invalid_send_result"}
|
||
return result
|
||
|
||
|
||
def parse_args() -> argparse.Namespace:
|
||
parser = argparse.ArgumentParser(
|
||
description="Reply XiaoHongShu 通知-评论和@ items with online reply-state checks."
|
||
)
|
||
parser.add_argument(
|
||
"--mode",
|
||
choices=("safe", "live"),
|
||
default="safe",
|
||
help="safe: draft only; live: send replies. Default: safe",
|
||
)
|
||
parser.add_argument(
|
||
"--max-scan-cards",
|
||
type=int,
|
||
default=120,
|
||
help="Max cards discovered from 评论和@ page. Default: 120",
|
||
)
|
||
parser.add_argument(
|
||
"--max-scroll-rounds",
|
||
type=int,
|
||
default=20,
|
||
help="Max scroll rounds for discovery. Default: 20",
|
||
)
|
||
parser.add_argument(
|
||
"--max-items",
|
||
type=int,
|
||
default=20,
|
||
help="Max items to process from discovered cards. Default: 20",
|
||
)
|
||
parser.add_argument(
|
||
"--early-stop-non-actionable",
|
||
type=int,
|
||
default=20,
|
||
help="Stop when this many consecutive non-actionable items are seen. Default: 20",
|
||
)
|
||
parser.add_argument(
|
||
"--fixed-reply",
|
||
help="Optional fixed reply text for all actionable items.",
|
||
)
|
||
parser.add_argument(
|
||
"--no-like",
|
||
action="store_true",
|
||
help="Disable conditional like action for satisfied comments.",
|
||
)
|
||
parser.add_argument(
|
||
"--max-expand-rounds",
|
||
type=int,
|
||
default=8,
|
||
help="Max rounds for clicking 展开更多回复 when checking already-replied state. Default: 8",
|
||
)
|
||
parser.add_argument(
|
||
"--max-scan-replies",
|
||
type=int,
|
||
default=200,
|
||
help="Max nested replies scanned in one parent thread. Default: 200",
|
||
)
|
||
parser.add_argument(
|
||
"--max-expand-seconds",
|
||
type=int,
|
||
default=20,
|
||
help="Max seconds for online reply-state expansion checks. Default: 20",
|
||
)
|
||
parser.add_argument(
|
||
"--report",
|
||
default="",
|
||
help="Absolute path for run report JSON. Default: output/xhs-reply/reply-report-<ts>.json",
|
||
)
|
||
parser.add_argument(
|
||
"--no-headed",
|
||
action="store_true",
|
||
help="Initialize Playwright session in headless mode when session is missing.",
|
||
)
|
||
return parser.parse_args()
|
||
|
||
|
||
def main() -> int:
|
||
args = parse_args()
|
||
if (
|
||
args.max_scan_cards < 1
|
||
or args.max_scroll_rounds < 1
|
||
or args.max_items < 1
|
||
or args.max_expand_rounds < 1
|
||
or args.max_scan_replies < 1
|
||
or args.max_expand_seconds < 1
|
||
):
|
||
print(
|
||
"max-scan-cards / max-scroll-rounds / max-items / max-expand-rounds / "
|
||
"max-scan-replies / max-expand-seconds must be positive.",
|
||
file=sys.stderr,
|
||
)
|
||
return 1
|
||
|
||
repo_root = Path(__file__).resolve().parents[3]
|
||
pw_shared = Path(os.environ.get("PW_SHARED_WRAPPER", str(repo_root / "tools/pw"))).expanduser()
|
||
if not pw_shared.exists() or not pw_shared.is_file():
|
||
print(f"Shared Playwright wrapper not found: {pw_shared}", file=sys.stderr)
|
||
return 1
|
||
if not os.access(pw_shared, os.X_OK):
|
||
print(f"Shared Playwright wrapper is not executable: {pw_shared}", file=sys.stderr)
|
||
return 1
|
||
|
||
os.environ["PLAYWRIGHT_SHARED_INIT_MODE"] = "headless" if args.no_headed else "headed"
|
||
report_path = (
|
||
Path(args.report).expanduser().resolve()
|
||
if args.report
|
||
else (
|
||
repo_root
|
||
/ "output/xhs-reply"
|
||
/ f"reply-report-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
|
||
).resolve()
|
||
)
|
||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
try:
|
||
ensure_pw_preflight(pw_shared)
|
||
run_pw(pw_shared, "snapshot")
|
||
login = is_logged_in(pw_shared)
|
||
if not bool(login.get("logged_in")):
|
||
print(
|
||
"XiaoHongShu is not logged in. Please log in at https://www.xiaohongshu.com and rerun.",
|
||
file=sys.stderr,
|
||
)
|
||
return 2
|
||
self_profile_id = parse_self_profile_id(str(login.get("profile_href", "")))
|
||
|
||
scope = open_comment_notifications(pw_shared)
|
||
if bool(scope.get("requires_login")):
|
||
print(
|
||
"XiaoHongShu is not logged in. Please log in at https://www.xiaohongshu.com and rerun.",
|
||
file=sys.stderr,
|
||
)
|
||
return 2
|
||
if not bool(scope.get("ok")):
|
||
raise FlowError("Failed to switch to 评论和@ tab.")
|
||
|
||
cards = collect_notification_cards(
|
||
pw_shared,
|
||
max_scan_cards=args.max_scan_cards,
|
||
max_scroll_rounds=args.max_scroll_rounds,
|
||
)
|
||
if not cards:
|
||
summary = {
|
||
"mode": args.mode,
|
||
"scanned_cards": 0,
|
||
"processed_items": 0,
|
||
"replies_sent": 0,
|
||
"planned": 0,
|
||
"skipped_duplicate": 0,
|
||
"already_replied": 0,
|
||
"failed": 0,
|
||
"message": "No actionable cards found.",
|
||
}
|
||
report = {"summary": summary, "items": []}
|
||
report_path.write_text(
|
||
json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8"
|
||
)
|
||
print(f"No actionable cards found. Report: {report_path}")
|
||
return 0
|
||
|
||
run_target_keys: set[str] = set()
|
||
run_dedupe_keys: set[str] = set()
|
||
items: list[dict[str, Any]] = []
|
||
consecutive_non_actionable = 0
|
||
|
||
for card in cards:
|
||
if len(items) >= args.max_items:
|
||
break
|
||
|
||
working_card = dict(card)
|
||
open_result: Optional[dict[str, Any]] = None
|
||
if not str(working_card.get("note_url", "")).strip():
|
||
open_result = open_note_detail_from_card(
|
||
pw_shared,
|
||
card=working_card,
|
||
)
|
||
opened_url = str(open_result.get("url", "")).strip()
|
||
if re.search(r"/(explore|discovery/item|note)/", opened_url):
|
||
working_card["note_url"] = opened_url
|
||
|
||
if str(open_result.get("reason", "")).strip() == "content_deleted":
|
||
row_deleted: dict[str, Any] = {
|
||
"timestamp": now_utc_iso(),
|
||
"mode": args.mode,
|
||
"card": card,
|
||
"note_url": opened_url,
|
||
"note_id": "",
|
||
"note_title": "",
|
||
"note_author": "",
|
||
"event_type": str(card.get("event_type", "")),
|
||
"notifier": str(card.get("notifier", "")),
|
||
"notifier_comment": "",
|
||
"parent_comment_id": "",
|
||
"status": "skipped_content_deleted",
|
||
"liked": "no",
|
||
"card_open": open_result,
|
||
}
|
||
items.append(row_deleted)
|
||
consecutive_non_actionable += 1
|
||
if consecutive_non_actionable >= args.early_stop_non_actionable:
|
||
break
|
||
continue
|
||
|
||
detail = collect_note_detail_context(
|
||
pw_shared,
|
||
card=working_card,
|
||
self_profile_id=self_profile_id,
|
||
reuse_current_page=bool(open_result and open_result.get("opened")),
|
||
)
|
||
target = detail.get("target") if isinstance(detail.get("target"), dict) else {}
|
||
target_comment = str(target.get("content", "")).strip()
|
||
parent_comment_id = str(target.get("parent_comment_id", "")).strip()
|
||
note_id = str(detail.get("note_id", "")).strip()
|
||
|
||
row: dict[str, Any] = {
|
||
"timestamp": now_utc_iso(),
|
||
"mode": args.mode,
|
||
"card": card,
|
||
"note_url": str(detail.get("note_url", working_card.get("note_url", ""))),
|
||
"note_id": note_id,
|
||
"note_title": str(detail.get("note_title", "")),
|
||
"note_author": str(detail.get("note_author", "")),
|
||
"event_type": str(card.get("event_type", "")),
|
||
"notifier": str(card.get("notifier", "")),
|
||
"notifier_comment": target_comment,
|
||
"parent_comment_id": parent_comment_id,
|
||
"status": "",
|
||
"liked": "no",
|
||
}
|
||
if open_result is not None:
|
||
row["card_open"] = open_result
|
||
|
||
if not detail.get("target_found") or not note_id or not parent_comment_id:
|
||
row["status"] = "failed"
|
||
row["failure_reason"] = str(detail.get("reason", "missing_target_or_ids"))
|
||
items.append(row)
|
||
consecutive_non_actionable += 1
|
||
if consecutive_non_actionable >= args.early_stop_non_actionable:
|
||
break
|
||
continue
|
||
|
||
quality = classify_comment_quality(target_comment)
|
||
row["comment_quality"] = quality
|
||
|
||
reply_text, confidence, reason = draft_reply(
|
||
target_comment=target_comment,
|
||
note_title=str(detail.get("note_title", "")),
|
||
fixed_reply=args.fixed_reply,
|
||
)
|
||
row["reply_text"] = reply_text
|
||
row["confidence"] = confidence
|
||
row["reply_reason"] = reason
|
||
|
||
target_key = f"{note_id}|{parent_comment_id}"
|
||
dedupe = f"{target_key}|{reply_hash(reply_text)}"
|
||
row["target_key"] = target_key
|
||
row["dedupe_key"] = dedupe
|
||
|
||
if target_key in run_target_keys or dedupe in run_dedupe_keys:
|
||
row["status"] = "skipped_duplicate"
|
||
items.append(row)
|
||
consecutive_non_actionable += 1
|
||
if consecutive_non_actionable >= args.early_stop_non_actionable:
|
||
break
|
||
continue
|
||
|
||
reply_state = check_reply_state_for_target(
|
||
pw_shared,
|
||
parent_comment_id=parent_comment_id,
|
||
self_profile_id=self_profile_id,
|
||
max_expand_rounds=args.max_expand_rounds,
|
||
max_scan_replies=args.max_scan_replies,
|
||
max_expand_seconds=args.max_expand_seconds,
|
||
)
|
||
row["reply_state"] = reply_state
|
||
state = str(reply_state.get("state", "unknown"))
|
||
if state == "already_replied":
|
||
row["status"] = "already_replied"
|
||
items.append(row)
|
||
consecutive_non_actionable += 1
|
||
if consecutive_non_actionable >= args.early_stop_non_actionable:
|
||
break
|
||
continue
|
||
if state == "unknown":
|
||
row["status"] = "needs_manual_review"
|
||
row["failure_reason"] = str(reply_state.get("reason", "reply_state_unknown"))
|
||
items.append(row)
|
||
consecutive_non_actionable += 1
|
||
if consecutive_non_actionable >= args.early_stop_non_actionable:
|
||
break
|
||
continue
|
||
|
||
consecutive_non_actionable = 0
|
||
# Runtime dedupe only: avoid duplicate send attempts within current run.
|
||
run_target_keys.add(target_key)
|
||
run_dedupe_keys.add(dedupe)
|
||
|
||
if quality == "satisfied" and not args.no_like:
|
||
row["liked"] = like_target_comment(pw_shared, parent_comment_id)
|
||
|
||
send_result = send_reply_for_comment(
|
||
pw_shared,
|
||
parent_comment_id=parent_comment_id,
|
||
reply_text=reply_text,
|
||
mode=args.mode,
|
||
)
|
||
row["status"] = str(send_result.get("status", "failed"))
|
||
if send_result.get("reason"):
|
||
row["failure_reason"] = str(send_result["reason"])
|
||
row["verified"] = bool(send_result.get("verified", False))
|
||
|
||
items.append(row)
|
||
|
||
summary = {
|
||
"mode": args.mode,
|
||
"scanned_cards": len(cards),
|
||
"processed_items": len(items),
|
||
"replies_sent": sum(1 for it in items if it.get("status") == "replied"),
|
||
"planned": sum(1 for it in items if it.get("status") == "planned"),
|
||
"already_replied": sum(1 for it in items if it.get("status") == "already_replied"),
|
||
"skipped_duplicate": sum(1 for it in items if it.get("status") == "skipped_duplicate"),
|
||
"needs_manual_review": sum(
|
||
1 for it in items if it.get("status") == "needs_manual_review"
|
||
),
|
||
"skipped_content_deleted": sum(
|
||
1 for it in items if it.get("status") == "skipped_content_deleted"
|
||
),
|
||
"failed": sum(1 for it in items if it.get("status") == "failed"),
|
||
"sent_unknown": sum(1 for it in items if it.get("status") == "sent_unknown"),
|
||
}
|
||
report = {"summary": summary, "items": items}
|
||
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
|
||
print(f"Report: {report_path}")
|
||
print(
|
||
"Summary: "
|
||
+ ", ".join(
|
||
[
|
||
f"scanned={summary['scanned_cards']}",
|
||
f"processed={summary['processed_items']}",
|
||
f"replied={summary['replies_sent']}",
|
||
f"planned={summary['planned']}",
|
||
f"skipped_duplicate={summary['skipped_duplicate']}",
|
||
f"needs_manual_review={summary['needs_manual_review']}",
|
||
f"skipped_content_deleted={summary['skipped_content_deleted']}",
|
||
f"already_replied={summary['already_replied']}",
|
||
f"failed={summary['failed']}",
|
||
f"sent_unknown={summary['sent_unknown']}",
|
||
]
|
||
)
|
||
)
|
||
return 0
|
||
except FlowError as exc:
|
||
print(str(exc), file=sys.stderr)
|
||
return 1
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|