diff --git a/.gitignore b/.gitignore index e0d3016..4f66f43 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ output/ .playwright-cli/ +**/.playwright-cli/ +.state/ __pycache__/ *.pyc diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..04d290c --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,17 @@ +# AGENTS.md + +## XiaoHongShu / Playwright Hard Rules + +These rules are mandatory for any workflow that uses `tools/pw` (directly or indirectly). + +1. Preflight before first browser action: + - Run `tools/pw --help` once with escalation (`sandbox_permissions=require_escalated`). +2. All `tools/pw ...` commands (including browse/like/comment/reply flows) must be executed with escalation (`sandbox_permissions=require_escalated`) by default. +3. If preflight or any `tools/pw ...` command still fails with network sandbox signatures, rerun the same command with escalation immediately: + - `ENOTFOUND` + - `EAI_AGAIN` + - `ETIMEDOUT` + - `ECONNREFUSED` + - `registry.npmjs.org` fetch failure +4. Do not continue XiaoHongShu actions (browse/like/comment/reply) until preflight succeeds. +5. Do not try alternate non-escalated workarounds for `tools/pw`. diff --git a/skills/gemini-image-web/SKILL.md b/skills/gemini-image-web/SKILL.md index 86f3c7d..1b3fa39 100644 --- a/skills/gemini-image-web/SKILL.md +++ b/skills/gemini-image-web/SKILL.md @@ -21,15 +21,14 @@ description: "Generate images in Gemini web via browser automation, download res - Ensure browser session can access Gemini (`https://gemini.google.com/app`). - If login, captcha, or MFA is required, pause and ask user to complete it manually. - Use the shared Playwright session policy across all skills: - - `export PLAYWRIGHT_SHARED_SESSION=codex-shared` - - Invoke Playwright CLI through `/Users/xd/java/xhs/tools/pw` (do not pass `--session` manually). + - Auto session policy: `tools/pw` derives one Playwright session per `CODEX_THREAD_ID` (fallback: `PLAYWRIGHT_SESSION_OWNER` or explicit `--session`). + - Invoke Playwright CLI through `/Users/xd/java/xhs/tools/pw`; use `--session ` only when explicit multi-session isolation is needed. - Decide output directory before generation, for example: - `/Users/xd/java/xhs/output/gemini-image` Quick run: ```bash -export PLAYWRIGHT_SHARED_SESSION=codex-shared python3 scripts/run_image_flow.py \ --prompt "生成一张电影感赛博朋克街景海报,夜晚霓虹,雨天反光,纵向构图。" \ --target /Users/xd/java/xhs/output/gemini-image \ @@ -155,6 +154,6 @@ Return: ## Scripts -- `/Users/xd/java/xhs/tools/pw`: Shared Playwright CLI entrypoint with fixed session + lock. +- `/Users/xd/java/xhs/tools/pw`: Shared Playwright CLI entrypoint with per-session lock, auto per-thread session resolution, and shared Chrome CDP defaults. - `scripts/run_image_flow.py`: End-to-end runner (login gate, enter image tool, generate, download image, collect files). - `scripts/collect_downloads.py`: Collect recent downloaded images with fallback sources, dedupe, and manifest. diff --git a/skills/gemini-image-web/agents/openai.yaml b/skills/gemini-image-web/agents/openai.yaml index 7f48033..b8d8b19 100644 --- a/skills/gemini-image-web/agents/openai.yaml +++ b/skills/gemini-image-web/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Gemini Image Web" short_description: "Generate Gemini images via web, multi-request, dedupe, and manifest." - default_prompt: "Use $gemini-image-web with PLAYWRIGHT_SHARED_SESSION=codex-shared; run scripts/run_image_flow.py via /Users/xd/java/xhs/tools/pw-backed CLI flow to verify login, generate images, prefer full-size download, and collect deduped outputs with manifest." + default_prompt: "Use $gemini-image-web with auto per-thread Playwright session via /Users/xd/java/xhs/tools/pw; run scripts/run_image_flow.py via /Users/xd/java/xhs/tools/pw-backed CLI flow to verify login, generate images, prefer full-size download, and collect deduped outputs with manifest." diff --git a/skills/gemini-music-web/SKILL.md b/skills/gemini-music-web/SKILL.md index 0917218..9515814 100644 --- a/skills/gemini-music-web/SKILL.md +++ b/skills/gemini-music-web/SKILL.md @@ -21,15 +21,14 @@ description: "Generate music in Gemini web via browser automation, download resu - Ensure browser session can access Gemini (`https://gemini.google.com/app`). - If login, captcha, or MFA is required, pause and ask user to complete it manually. - Use the shared Playwright session policy across all skills: - - `export PLAYWRIGHT_SHARED_SESSION=codex-shared` - - Invoke Playwright CLI through `/Users/xd/java/xhs/tools/pw` (do not pass `--session` manually). + - Auto session policy: `tools/pw` derives one Playwright session per `CODEX_THREAD_ID` (fallback: `PLAYWRIGHT_SESSION_OWNER` or explicit `--session`). + - Invoke Playwright CLI through `/Users/xd/java/xhs/tools/pw`; use `--session ` only when explicit multi-session isolation is needed. - Decide output directory before generation, for example: - `/Users/xd/java/xhs/output/gemini-music` Quick run: ```bash -export PLAYWRIGHT_SHARED_SESSION=codex-shared python3 scripts/run_music_flow.py \ --prompt "创作一段 90 BPM 的 lo-fi hiphop,温暖、夜晚、钢琴和刷镲,时长 30 秒。" \ --target /Users/xd/java/xhs/output/gemini-music \ @@ -156,6 +155,6 @@ Return: ## Scripts -- `/Users/xd/java/xhs/tools/pw`: Shared Playwright CLI entrypoint with fixed session + lock. +- `/Users/xd/java/xhs/tools/pw`: Shared Playwright CLI entrypoint with per-session lock, auto per-thread session resolution, and shared Chrome CDP defaults. - `scripts/run_music_flow.py`: End-to-end runner (login gate, enter music tool, generate, download MP3, collect files). - `scripts/collect_downloads.py`: Collect recent downloaded audio files with fallback sources, dedupe, and manifest. diff --git a/skills/gemini-music-web/agents/openai.yaml b/skills/gemini-music-web/agents/openai.yaml index 3c642ee..6f07632 100644 --- a/skills/gemini-music-web/agents/openai.yaml +++ b/skills/gemini-music-web/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Gemini Music Web" short_description: "Generate Gemini music via web with login gate and manifest." - default_prompt: "Use $gemini-music-web with PLAYWRIGHT_SHARED_SESSION=codex-shared; run scripts/run_music_flow.py via /Users/xd/java/xhs/tools/pw-backed CLI flow to verify login, generate music, prefer MP3 download, and collect deduped outputs with manifest." + default_prompt: "Use $gemini-music-web with auto per-thread Playwright session via /Users/xd/java/xhs/tools/pw; run scripts/run_music_flow.py via /Users/xd/java/xhs/tools/pw-backed CLI flow to verify login, generate music, prefer MP3 download, and collect deduped outputs with manifest." diff --git a/skills/gemini-video-web/SKILL.md b/skills/gemini-video-web/SKILL.md index ae81494..ed865ca 100644 --- a/skills/gemini-video-web/SKILL.md +++ b/skills/gemini-video-web/SKILL.md @@ -21,15 +21,14 @@ description: "Generate videos in Gemini web via browser automation, download res - Ensure browser session can access Gemini (`https://gemini.google.com/app`). - If login, captcha, or MFA is required, pause and ask user to complete it manually. - Use the shared Playwright session policy across all skills: - - `export PLAYWRIGHT_SHARED_SESSION=codex-shared` - - Invoke Playwright CLI through `/Users/xd/java/xhs/tools/pw` (do not pass `--session` manually). + - Auto session policy: `tools/pw` derives one Playwright session per `CODEX_THREAD_ID` (fallback: `PLAYWRIGHT_SESSION_OWNER` or explicit `--session`). + - Invoke Playwright CLI through `/Users/xd/java/xhs/tools/pw`; use `--session ` only when explicit multi-session isolation is needed. - Decide output directory before generation, for example: - `/Users/xd/java/xhs/output/gemini-video` Quick run: ```bash -export PLAYWRIGHT_SHARED_SESSION=codex-shared python3 scripts/run_video_flow.py \ --prompt "生成一段 8 秒的科幻城市夜景镜头,雨夜霓虹,电影感运镜。" \ --target /Users/xd/java/xhs/output/gemini-video \ @@ -156,6 +155,6 @@ Return: ## Scripts -- `/Users/xd/java/xhs/tools/pw`: Shared Playwright CLI entrypoint with fixed session + lock. +- `/Users/xd/java/xhs/tools/pw`: Shared Playwright CLI entrypoint with per-session lock, auto per-thread session resolution, and shared Chrome CDP defaults. - `scripts/run_video_flow.py`: End-to-end runner (login gate, enter video tool, generate, download video, collect files). - `scripts/collect_downloads.py`: Collect recent downloaded video files with fallback sources, dedupe, and manifest. diff --git a/skills/gemini-video-web/agents/openai.yaml b/skills/gemini-video-web/agents/openai.yaml index b962e6f..ac8911c 100644 --- a/skills/gemini-video-web/agents/openai.yaml +++ b/skills/gemini-video-web/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Gemini Video Web" short_description: "Generate Gemini videos via web with login gate and manifest." - default_prompt: "Use $gemini-video-web with PLAYWRIGHT_SHARED_SESSION=codex-shared; run scripts/run_video_flow.py via /Users/xd/java/xhs/tools/pw-backed CLI flow to verify login, generate videos, prefer MP4/high-quality download, and collect deduped outputs with manifest." + default_prompt: "Use $gemini-video-web with auto per-thread Playwright session via /Users/xd/java/xhs/tools/pw; run scripts/run_video_flow.py via /Users/xd/java/xhs/tools/pw-backed CLI flow to verify login, generate videos, prefer MP4/high-quality download, and collect deduped outputs with manifest." diff --git a/skills/xiaohongshu-engage/SKILL.md b/skills/xiaohongshu-engage/SKILL.md index c93b357..28f56c9 100644 --- a/skills/xiaohongshu-engage/SKILL.md +++ b/skills/xiaohongshu-engage/SKILL.md @@ -15,10 +15,18 @@ description: "Browse XiaoHongShu (小红书) with Playwright and execute account ## Shared Session Policy -- Reuse a single Playwright CLI session across all web-automation skills: - - `export PLAYWRIGHT_SHARED_SESSION=codex-shared` +- Use per-thread Playwright CLI sessions across all web-automation skills: + - Auto session policy: `tools/pw` derives one Playwright session per `CODEX_THREAD_ID` (fallback: `PLAYWRIGHT_SESSION_OWNER` or explicit `--session`). - Use `/Users/xd/java/xhs/tools/pw` as the only Playwright CLI entrypoint. -- Do not pass `--session` directly in skill-specific commands. +- Prefer the default auto session; pass `--session ` only when explicit isolation is required. + +## Network Preflight Gate (Required) + +- Before first browser step, run: + - `tools/pw --help` +- If it fails with network sandbox signatures (`ENOTFOUND`, `EAI_AGAIN`, `ETIMEDOUT`, `ECONNREFUSED`, or `registry.npmjs.org` fetch failure): + - rerun the same command with escalation (`sandbox_permissions=require_escalated`). +- Do not continue browsing/点赞/收藏/评论 until this preflight succeeds. ## 1) Confirm Login State diff --git a/skills/xiaohongshu-engage/agents/openai.yaml b/skills/xiaohongshu-engage/agents/openai.yaml index 2171548..c77205f 100644 --- a/skills/xiaohongshu-engage/agents/openai.yaml +++ b/skills/xiaohongshu-engage/agents/openai.yaml @@ -1,7 +1,7 @@ interface: display_name: "XHS Engage" short_description: "Browse feed and engage via likes, favorites, and comments." - default_prompt: "Use $xiaohongshu-engage with PLAYWRIGHT_SHARED_SESSION=codex-shared and execute browser steps only through /Users/xd/java/xhs/tools/pw while browsing XiaoHongShu and interacting with relevant posts through likes, favorites, and concise comments." + default_prompt: "Use $xiaohongshu-engage with auto per-thread Playwright session via /Users/xd/java/xhs/tools/pw and execute browser steps only through /Users/xd/java/xhs/tools/pw while browsing XiaoHongShu and interacting with relevant posts through likes, favorites, and concise comments." policy: allow_implicit_invocation: true diff --git a/skills/xiaohongshu-publish-note/SKILL.md b/skills/xiaohongshu-publish-note/SKILL.md index 9d024db..f44aa58 100644 --- a/skills/xiaohongshu-publish-note/SKILL.md +++ b/skills/xiaohongshu-publish-note/SKILL.md @@ -18,10 +18,10 @@ description: "Execute XiaoHongShu (小红书) image-note publishing workflow in ## Shared Session Policy -- Reuse a single Playwright CLI session across all web-automation skills: - - `export PLAYWRIGHT_SHARED_SESSION=codex-shared` +- Use per-thread Playwright CLI sessions across all web-automation skills: + - Auto session policy: `tools/pw` derives one Playwright session per `CODEX_THREAD_ID` (fallback: `PLAYWRIGHT_SESSION_OWNER` or explicit `--session`). - Use `/Users/xd/java/xhs/tools/pw` as the only Playwright CLI entrypoint. -- Do not pass `--session` directly in skill-specific commands. +- Prefer the default auto session; pass `--session ` only when explicit isolation is required. ## 1) Enter Creator Publish Page diff --git a/skills/xiaohongshu-publish-note/agents/openai.yaml b/skills/xiaohongshu-publish-note/agents/openai.yaml index 32b37e6..7890c4e 100644 --- a/skills/xiaohongshu-publish-note/agents/openai.yaml +++ b/skills/xiaohongshu-publish-note/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "XHS Publish Note" short_description: "Publish XHS image notes with manifest image linkage, hard gates, and publish modes" - default_prompt: "Use $xiaohongshu-publish-note with PLAYWRIGHT_SHARED_SESSION=codex-shared and run all browser steps via /Users/xd/java/xhs/tools/pw; publish XiaoHongShu image notes by preferring user image paths, otherwise generate via $gemini-image-web and use manifest target paths, enforce hard gates (images>=1, topics>=5, no location), and run in safe_mode by default unless live_mode is explicitly requested." + default_prompt: "Use $xiaohongshu-publish-note with auto per-thread Playwright session via /Users/xd/java/xhs/tools/pw and run all browser steps via /Users/xd/java/xhs/tools/pw; publish XiaoHongShu image notes by preferring user image paths, otherwise generate via $gemini-image-web and use manifest target paths, enforce hard gates (images>=1, topics>=5, no location), and run in safe_mode by default unless live_mode is explicitly requested." diff --git a/skills/xiaohongshu-publish-video/SKILL.md b/skills/xiaohongshu-publish-video/SKILL.md index c49da1d..d1b12e7 100644 --- a/skills/xiaohongshu-publish-video/SKILL.md +++ b/skills/xiaohongshu-publish-video/SKILL.md @@ -18,10 +18,10 @@ description: "Execute XiaoHongShu (小红书) video-note publishing workflow in ## Shared Session Policy -- Reuse a single Playwright CLI session across all web-automation skills: - - `export PLAYWRIGHT_SHARED_SESSION=codex-shared` +- Use per-thread Playwright CLI sessions across all web-automation skills: + - Auto session policy: `tools/pw` derives one Playwright session per `CODEX_THREAD_ID` (fallback: `PLAYWRIGHT_SESSION_OWNER` or explicit `--session`). - Use `/Users/xd/java/xhs/tools/pw` as the only Playwright CLI entrypoint. -- Do not pass `--session` directly in skill-specific commands. +- Prefer the default auto session; pass `--session ` only when explicit isolation is required. ## 1) Enter Creator Publish Page diff --git a/skills/xiaohongshu-publish-video/agents/openai.yaml b/skills/xiaohongshu-publish-video/agents/openai.yaml index 9646df4..fc02415 100644 --- a/skills/xiaohongshu-publish-video/agents/openai.yaml +++ b/skills/xiaohongshu-publish-video/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "XHS Publish Video" short_description: "Publish XHS video notes with manifest linkage, hard gates, and publish modes" - default_prompt: "Use $xiaohongshu-publish-video with PLAYWRIGHT_SHARED_SESSION=codex-shared and run all browser steps via /Users/xd/java/xhs/tools/pw; publish XiaoHongShu video notes by preferring user video paths, otherwise generate via $gemini-video-web and use manifest target paths, enforce hard gates (videos>=1, topics>=5, no location, video-ready=true), and run in safe_mode by default unless live_mode is explicitly requested." + default_prompt: "Use $xiaohongshu-publish-video with auto per-thread Playwright session via /Users/xd/java/xhs/tools/pw and run all browser steps via /Users/xd/java/xhs/tools/pw; publish XiaoHongShu video notes by preferring user video paths, otherwise generate via $gemini-video-web and use manifest target paths, enforce hard gates (videos>=1, topics>=5, no location, video-ready=true), and run in safe_mode by default unless live_mode is explicitly requested." diff --git a/tools/pw b/tools/pw index e591c8e..fa20cdf 100755 --- a/tools/pw +++ b/tools/pw @@ -1,25 +1,34 @@ #!/usr/bin/env bash set -euo pipefail +usage() { + cat >&2 <<'EOF' +Usage: tools/pw [--session ] [args...] + +Session resolution priority: +1) --session +2) PLAYWRIGHT_SHARED_SESSION +3) PLAYWRIGHT_SESSION_OWNER +4) CODEX_THREAD_ID +5) codex-default +EOF +} + if [[ $# -lt 1 ]]; then - echo "Usage: tools/pw [args...]" >&2 + usage exit 1 fi -for arg in "$@"; do - if [[ "$arg" == "--session" || "$arg" == "--session="* ]]; then - echo "Do not pass --session directly. Use PLAYWRIGHT_SHARED_SESSION." >&2 - exit 2 - fi -done - CODEX_HOME="${CODEX_HOME:-$HOME/.codex}" PWCLI="${PWCLI:-$CODEX_HOME/skills/playwright/scripts/playwright_cli.sh}" -SESSION="${PLAYWRIGHT_SHARED_SESSION:-codex-shared}" LOCK_TIMEOUT="${PLAYWRIGHT_SHARED_LOCK_TIMEOUT:-120}" -LOCK_DIR="${PLAYWRIGHT_SHARED_LOCK_DIR:-/tmp/pw-shared-session.lock}" +LOCK_ROOT="${PLAYWRIGHT_SHARED_LOCK_DIR_BASE:-/tmp/pw-session-locks}" INIT_MODE="${PLAYWRIGHT_SHARED_INIT_MODE:-headed}" +if [[ ! -d "$LOCK_ROOT" ]]; then + mkdir -p "$LOCK_ROOT" +fi + if ! command -v npx >/dev/null 2>&1; then echo "npx is required." >&2 exit 1 @@ -29,6 +38,70 @@ if [[ ! -x "$PWCLI" ]]; then exit 1 fi +sanitize_token() { + local value="$1" + value="$(printf '%s' "$value" \ + | tr '[:upper:]' '[:lower:]' \ + | tr -cs 'a-z0-9._-' '-' \ + | sed -e 's/^-*//' -e 's/-*$//')" + if [[ -z "$value" ]]; then + value="default" + fi + printf '%s' "$value" +} + +derive_session() { + if [[ -n "${PLAYWRIGHT_SHARED_SESSION:-}" ]]; then + printf '%s' "$PLAYWRIGHT_SHARED_SESSION" + return + fi + local owner="${PLAYWRIGHT_SESSION_OWNER:-${CODEX_THREAD_ID:-default}}" + local owner_hash + owner_hash="$(printf '%s' "$owner" | shasum -a 256 | awk '{print substr($1,1,12)}')" + if [[ -z "$owner_hash" ]]; then + owner_hash="default" + fi + printf 'codex-%s' "$owner_hash" +} + +session_override="" +cmd=() +while [[ $# -gt 0 ]]; do + case "$1" in + --session) + if [[ $# -lt 2 ]]; then + echo "--session requires a value." >&2 + exit 1 + fi + session_override="$2" + shift 2 + ;; + --session=*) + session_override="${1#--session=}" + shift + ;; + *) + cmd+=("$1") + shift + ;; + esac +done + +if [[ ${#cmd[@]} -lt 1 ]]; then + usage + exit 1 +fi + +SESSION="${session_override:-$(derive_session)}" +SAFE_SESSION="$(sanitize_token "$SESSION")" +LOCK_DIR="${LOCK_ROOT}/${SAFE_SESSION}.lock" + +# Default to a single user-owned Chrome via CDP unless caller overrides. +: "${PLAYWRIGHT_MCP_CDP_ENDPOINT:=http://127.0.0.1:9222}" +: "${PLAYWRIGHT_MCP_ISOLATED:=false}" +export PLAYWRIGHT_MCP_CDP_ENDPOINT +export PLAYWRIGHT_MCP_ISOLATED + acquire_lock() { local start_ts now lock_pid lock_mtime start_ts="$(date +%s)" @@ -72,9 +145,39 @@ is_missing_session_error() { return 1 } +init_session() { + local init_code + set +e + if [[ "$INIT_MODE" == "headless" ]]; then + run_pw open about:blank >/dev/null 2>&1 + init_code=$? + elif [[ "$INIT_MODE" == "headed" ]]; then + run_pw open about:blank --headed >/dev/null 2>&1 + init_code=$? + if [[ $init_code -ne 0 ]]; then + run_pw open about:blank >/dev/null 2>&1 + init_code=$? + fi + else + run_pw open about:blank --headed >/dev/null 2>&1 + init_code=$? + if [[ $init_code -ne 0 ]]; then + run_pw open about:blank >/dev/null 2>&1 + init_code=$? + fi + fi + set -e + if [[ $init_code -ne 0 ]]; then + cat >&2 <&1)" - code=$? - set -e - if [[ $code -eq 0 ]]; then - echo "$out" - exit 0 - fi - if is_missing_session_error "$out"; then - set +e - if [[ "$INIT_MODE" == "headless" ]]; then - run_pw open about:blank >/dev/null 2>&1 - init_code=$? - elif [[ "$INIT_MODE" == "headed" ]]; then - run_pw open about:blank --headed >/dev/null 2>&1 - init_code=$? - if [[ $init_code -ne 0 ]]; then - run_pw open about:blank >/dev/null 2>&1 - init_code=$? - fi - else - run_pw open about:blank --headed >/dev/null 2>&1 - init_code=$? - if [[ $init_code -ne 0 ]]; then - run_pw open about:blank >/dev/null 2>&1 - init_code=$? - fi - fi - set -e - if [[ $init_code -ne 0 ]]; then - echo "Failed to initialize shared Playwright session." >&2 - exit 1 - fi - run_pw "${cmd[@]}" - exit 0 - fi - echo "$out" >&2 - exit $code +set +e +out="$(run_pw "${cmd[@]}" 2>&1)" +code=$? +set -e +if [[ $code -eq 0 ]]; then + echo "$out" + exit 0 fi -run_pw "${cmd[@]}" +if is_missing_session_error "$out"; then + init_session + run_pw "${cmd[@]}" + exit 0 +fi + +echo "$out" >&2 +exit $code