feat(skill/playwright): unify shared-session CLI flow for web skills
This commit is contained in:
@@ -20,13 +20,17 @@ description: "Generate music in Gemini web via browser automation, download resu
|
||||
|
||||
- Ensure browser session can access Gemini (`https://gemini.google.com/app`).
|
||||
- If login, captcha, or MFA is required, pause and ask user to complete it manually.
|
||||
- Use the shared Playwright session policy across all skills:
|
||||
- `export PLAYWRIGHT_SHARED_SESSION=codex-shared`
|
||||
- Invoke Playwright CLI through `/Users/xd/java/xhs/tools/pw` (do not pass `--session` manually).
|
||||
- Decide output directory before generation, for example:
|
||||
- `/Users/xd/java/xhs/output/gemini-music`
|
||||
|
||||
Quick run:
|
||||
|
||||
```bash
|
||||
bash scripts/run_music_flow.sh \
|
||||
export PLAYWRIGHT_SHARED_SESSION=codex-shared
|
||||
python3 scripts/run_music_flow.py \
|
||||
--prompt "创作一段 90 BPM 的 lo-fi hiphop,温暖、夜晚、钢琴和刷镲,时长 30 秒。" \
|
||||
--target /Users/xd/java/xhs/output/gemini-music \
|
||||
--count 1
|
||||
@@ -152,5 +156,6 @@ Return:
|
||||
|
||||
## Scripts
|
||||
|
||||
- `scripts/run_music_flow.sh`: End-to-end runner (login gate, enter music tool, generate, download MP3, collect files).
|
||||
- `/Users/xd/java/xhs/tools/pw`: Shared Playwright CLI entrypoint with fixed session + lock.
|
||||
- `scripts/run_music_flow.py`: End-to-end runner (login gate, enter music tool, generate, download MP3, collect files).
|
||||
- `scripts/collect_downloads.py`: Collect recent downloaded audio files with fallback sources, dedupe, and manifest.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
interface:
|
||||
display_name: "Gemini Music Web"
|
||||
short_description: "Generate Gemini music via web with login gate and manifest."
|
||||
default_prompt: "Use $gemini-music-web to run scripts/run_music_flow.sh: verify Gemini login, enter 创作音乐, generate tracks one-by-one, prefer MP3 download, then collect files with dedupe and manifest metadata."
|
||||
default_prompt: "Use $gemini-music-web with PLAYWRIGHT_SHARED_SESSION=codex-shared; run scripts/run_music_flow.py via /Users/xd/java/xhs/tools/pw-backed CLI flow to verify login, generate music, prefer MP3 download, and collect deduped outputs with manifest."
|
||||
|
||||
@@ -151,6 +151,8 @@ def resolve_sources(raw_sources: list[str] | None) -> list[Path]:
|
||||
if raw_sources:
|
||||
return [Path(item).expanduser().resolve() for item in raw_sources]
|
||||
auto_sources = discover_playwright_sources()
|
||||
auto_sources.append((Path.cwd() / ".playwright-cli").resolve())
|
||||
auto_sources.append((Path(__file__).resolve().parents[3] / ".playwright-cli").resolve())
|
||||
auto_sources.append((Path.home() / "Downloads").resolve())
|
||||
result: list[Path] = []
|
||||
seen: set[Path] = set()
|
||||
|
||||
+232
@@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run Gemini music generation flow end-to-end via Playwright CLI."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class FlowError(RuntimeError):
|
||||
"""Raised when a subprocess command in the flow fails."""
|
||||
|
||||
|
||||
def run_command(
|
||||
cmd: list[str], *, capture_output: bool = True, check: bool = True
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
kwargs: dict[str, object] = {"text": True}
|
||||
if capture_output:
|
||||
kwargs["stdout"] = subprocess.PIPE
|
||||
kwargs["stderr"] = subprocess.STDOUT
|
||||
proc = subprocess.run(cmd, **kwargs)
|
||||
if check and proc.returncode != 0:
|
||||
output = proc.stdout if capture_output else ""
|
||||
raise FlowError(
|
||||
f"Command failed ({proc.returncode}): {' '.join(cmd)}\n{output}"
|
||||
)
|
||||
return proc
|
||||
|
||||
|
||||
def run_pw(pw_shared: Path, *args: str) -> str:
|
||||
proc = run_command([str(pw_shared), *args], capture_output=True)
|
||||
return proc.stdout or ""
|
||||
|
||||
|
||||
def is_login_required(pw_shared: Path) -> bool:
|
||||
out = run_pw(
|
||||
pw_shared,
|
||||
"eval",
|
||||
(
|
||||
"() => {"
|
||||
"const hasAccount = !!document.querySelector("
|
||||
"'button[aria-label*=\\\"Google 账号\\\"], "
|
||||
"button[aria-label*=\\\"Google Account\\\"]'"
|
||||
");"
|
||||
"const hasService = !!document.querySelector('a[href*=\\\"ServiceLogin\\\"]');"
|
||||
"const hasLoginCtl = Array.from(document.querySelectorAll('a,button'))"
|
||||
".some(el => /登录|Sign in/i.test((el.textContent || '').trim()));"
|
||||
"return !hasAccount && (hasService || hasLoginCtl);"
|
||||
"}"
|
||||
),
|
||||
)
|
||||
return bool(re.search(r"(?m)^true$", out))
|
||||
|
||||
|
||||
def enter_music_tool(pw_shared: Path) -> None:
|
||||
js = r"""
|
||||
async (page) => {
|
||||
const labels = [/创作音乐/, /制作音乐/, /Create music/i, /Music/i];
|
||||
|
||||
const tryCardButtons = async () => {
|
||||
for (const re of labels) {
|
||||
const btn = page.getByRole('button', { name: re }).first();
|
||||
if (await btn.count()) {
|
||||
try {
|
||||
await btn.click({ timeout: 2000 });
|
||||
return true;
|
||||
} catch (_) {
|
||||
// Overlay may intercept pointer. Fall through to menu strategy.
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const tryToolMenu = async () => {
|
||||
await page.getByRole('button', { name: '工具', exact: true }).click();
|
||||
for (const re of labels) {
|
||||
const itemCheck = page.getByRole('menuitemcheckbox', { name: re }).first();
|
||||
if (await itemCheck.count()) {
|
||||
await itemCheck.click();
|
||||
return true;
|
||||
}
|
||||
const itemPlain = page.getByRole('menuitem', { name: re }).first();
|
||||
if (await itemPlain.count()) {
|
||||
await itemPlain.click();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
let ok = await tryCardButtons();
|
||||
if (!ok) ok = await tryToolMenu();
|
||||
if (!ok) ok = await tryToolMenu();
|
||||
if (!ok) throw new Error('Music tool entry not found');
|
||||
}
|
||||
"""
|
||||
run_pw(pw_shared, "run-code", js)
|
||||
|
||||
|
||||
def submit_and_download_one(pw_shared: Path, prompt: str) -> None:
|
||||
js = f"""
|
||||
async (page) => {{
|
||||
const prompt = {json.dumps(prompt)};
|
||||
const input = page.getByRole('textbox', {{ name: /为 Gemini 输入提示|Enter a prompt/i }}).first();
|
||||
await input.click();
|
||||
await input.fill(prompt);
|
||||
await input.press('Enter');
|
||||
|
||||
const stopBtn = page.getByRole('button', {{ name: /停止回答|Stop response/i }}).first();
|
||||
await stopBtn.waitFor({{ state: 'visible', timeout: 15000 }}).catch(() => {{}});
|
||||
await stopBtn.waitFor({{ state: 'hidden', timeout: 240000 }});
|
||||
|
||||
const downloadBtn = page.getByRole('button', {{ name: /下载音乐作品|Download music/i }}).last();
|
||||
await downloadBtn.click();
|
||||
|
||||
const mp3Item = page.getByRole('menuitem', {{ name: /纯音频|MP3/i }}).first();
|
||||
if (await mp3Item.count()) {{
|
||||
await mp3Item.click();
|
||||
}} else {{
|
||||
const anyItem = page.getByRole('menuitem').first();
|
||||
if (await anyItem.count()) await anyItem.click();
|
||||
}}
|
||||
|
||||
await page.waitForTimeout(1200);
|
||||
}}
|
||||
"""
|
||||
run_pw(pw_shared, "run-code", js)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate music on Gemini web and collect downloaded files."
|
||||
)
|
||||
parser.add_argument("--prompt", required=True, help="Prompt text for music generation.")
|
||||
parser.add_argument(
|
||||
"--target", required=True, help="Absolute output directory for collected files."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--count", type=int, default=1, help="Number of tracks to generate. Default: 1."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-headed",
|
||||
action="store_true",
|
||||
help="Run browser without headed mode.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
if args.count < 1:
|
||||
print("--count must be a positive integer.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
repo_root = Path(__file__).resolve().parents[3]
|
||||
pw_shared = Path(
|
||||
os.environ.get("PW_SHARED_WRAPPER", str(repo_root / "tools/pw"))
|
||||
).expanduser()
|
||||
collect_script = (Path(__file__).resolve().parent / "collect_downloads.py").resolve()
|
||||
|
||||
if not pw_shared.exists() or not pw_shared.is_file():
|
||||
print(f"Shared Playwright wrapper not found: {pw_shared}", file=sys.stderr)
|
||||
return 1
|
||||
if not os.access(pw_shared, os.X_OK):
|
||||
print(f"Shared Playwright wrapper is not executable: {pw_shared}", file=sys.stderr)
|
||||
return 1
|
||||
if not collect_script.exists():
|
||||
print(f"Collector script not found: {collect_script}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
target = Path(args.target).expanduser().resolve()
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
start_ts = time.time()
|
||||
|
||||
try:
|
||||
os.environ["PLAYWRIGHT_SHARED_INIT_MODE"] = (
|
||||
"headless" if args.no_headed else "headed"
|
||||
)
|
||||
run_pw(pw_shared, "snapshot")
|
||||
run_pw(pw_shared, "goto", "https://gemini.google.com/app")
|
||||
run_pw(pw_shared, "snapshot")
|
||||
|
||||
if is_login_required(pw_shared):
|
||||
print(
|
||||
"Gemini is not logged in. Please log in at https://gemini.google.com/app and rerun.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
enter_music_tool(pw_shared)
|
||||
|
||||
for i in range(1, args.count + 1):
|
||||
current_prompt = args.prompt
|
||||
if args.count > 1:
|
||||
current_prompt = (
|
||||
f"{args.prompt}\n"
|
||||
f"变体要求:这是第 {i} / {args.count} 首。保持风格一致,但旋律和节奏细节需要变化。"
|
||||
)
|
||||
submit_and_download_one(pw_shared, current_prompt)
|
||||
|
||||
collect_cmd = [
|
||||
sys.executable,
|
||||
str(collect_script),
|
||||
"--target",
|
||||
str(target),
|
||||
"--since",
|
||||
str(start_ts),
|
||||
"--expected-count",
|
||||
str(args.count),
|
||||
"--limit",
|
||||
str(args.count),
|
||||
"--prefix",
|
||||
"gemini-music",
|
||||
"--prompt",
|
||||
args.prompt,
|
||||
]
|
||||
proc = run_command(collect_cmd, capture_output=False, check=False)
|
||||
return proc.returncode
|
||||
except FlowError as exc:
|
||||
print(str(exc), file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,230 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage:
|
||||
run_music_flow.sh --prompt "<text>" --target /abs/output/dir [--count N] [--session NAME] [--no-headed]
|
||||
|
||||
Example:
|
||||
run_music_flow.sh \
|
||||
--prompt "创作一段 90 BPM 的 lo-fi hiphop,温暖、夜晚、钢琴和刷镲,时长 30 秒。" \
|
||||
--target /Users/xd/java/xhs/output/gemini-music \
|
||||
--count 2
|
||||
EOF
|
||||
}
|
||||
|
||||
PROMPT=""
|
||||
TARGET=""
|
||||
COUNT=1
|
||||
SESSION="gmw$(date +%s)"
|
||||
HEADED=1
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--prompt)
|
||||
PROMPT="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--target)
|
||||
TARGET="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--count)
|
||||
COUNT="${2:-1}"
|
||||
shift 2
|
||||
;;
|
||||
--session)
|
||||
SESSION="${2:-$SESSION}"
|
||||
shift 2
|
||||
;;
|
||||
--no-headed)
|
||||
HEADED=0
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown arg: $1" >&2
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$PROMPT" || -z "$TARGET" ]]; then
|
||||
echo "Both --prompt and --target are required." >&2
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! [[ "$COUNT" =~ ^[0-9]+$ ]] || [[ "$COUNT" -lt 1 ]]; then
|
||||
echo "--count must be a positive integer." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CODEX_HOME="${CODEX_HOME:-$HOME/.codex}"
|
||||
PWCLI="${PWCLI:-$CODEX_HOME/skills/playwright/scripts/playwright_cli.sh}"
|
||||
COLLECT_SCRIPT="$(cd "$(dirname "$0")" && pwd)/collect_downloads.py"
|
||||
|
||||
if ! command -v npx >/dev/null 2>&1; then
|
||||
echo "npx is required." >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -x "$PWCLI" ]]; then
|
||||
echo "Playwright wrapper not found or not executable: $PWCLI" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f "$COLLECT_SCRIPT" ]]; then
|
||||
echo "Collector script not found: $COLLECT_SCRIPT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pw() {
|
||||
"$PWCLI" --session "$SESSION" "$@"
|
||||
}
|
||||
|
||||
json_escape() {
|
||||
python3 - "$1" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
print(json.dumps(sys.argv[1]))
|
||||
PY
|
||||
}
|
||||
|
||||
is_login_required() {
|
||||
local out
|
||||
out="$(
|
||||
pw eval "() => {
|
||||
const hasAccount = !!document.querySelector('button[aria-label*=\\\"Google 账号\\\"], button[aria-label*=\\\"Google Account\\\"]');
|
||||
const hasService = !!document.querySelector('a[href*=\\\"ServiceLogin\\\"]');
|
||||
const hasLoginCtl = Array.from(document.querySelectorAll('a,button')).some(el => /登录|Sign in/i.test((el.textContent || '').trim()));
|
||||
return !hasAccount && (hasService || hasLoginCtl);
|
||||
}"
|
||||
)"
|
||||
echo "$out" | rg -q '^true$'
|
||||
}
|
||||
|
||||
enter_music_tool() {
|
||||
local js
|
||||
js="$(cat <<'JS'
|
||||
const labels = [/创作音乐/, /制作音乐/, /Create music/i, /Music/i];
|
||||
|
||||
const tryCardButtons = async () => {
|
||||
for (const re of labels) {
|
||||
const btn = page.getByRole('button', { name: re }).first();
|
||||
if (await btn.count()) {
|
||||
try {
|
||||
await btn.click({ timeout: 2000 });
|
||||
return true;
|
||||
} catch (_) {
|
||||
// Overlay may intercept pointer. Fall through to menu strategy.
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const tryToolMenu = async () => {
|
||||
await page.getByRole('button', { name: '工具', exact: true }).click();
|
||||
for (const re of labels) {
|
||||
const itemCheck = page.getByRole('menuitemcheckbox', { name: re }).first();
|
||||
if (await itemCheck.count()) {
|
||||
await itemCheck.click();
|
||||
return true;
|
||||
}
|
||||
const itemPlain = page.getByRole('menuitem', { name: re }).first();
|
||||
if (await itemPlain.count()) {
|
||||
await itemPlain.click();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
let ok = await tryCardButtons();
|
||||
if (!ok) ok = await tryToolMenu();
|
||||
if (!ok) {
|
||||
// Re-open the tool menu once and retry as a last attempt.
|
||||
ok = await tryToolMenu();
|
||||
}
|
||||
if (!ok) {
|
||||
throw new Error('Music tool entry not found');
|
||||
}
|
||||
JS
|
||||
)"
|
||||
pw run-code "$js" >/dev/null
|
||||
}
|
||||
|
||||
submit_and_download_one() {
|
||||
local track_prompt="$1"
|
||||
local escaped
|
||||
escaped="$(json_escape "$track_prompt")"
|
||||
local js
|
||||
js="$(cat <<JS
|
||||
const prompt = $escaped;
|
||||
const input = page.getByRole('textbox', { name: /为 Gemini 输入提示|Enter a prompt/i }).first();
|
||||
await input.click();
|
||||
await input.fill(prompt);
|
||||
await input.press('Enter');
|
||||
|
||||
const stopBtn = page.getByRole('button', { name: /停止回答|Stop response/i }).first();
|
||||
await stopBtn.waitFor({ state: 'visible', timeout: 15000 }).catch(() => {});
|
||||
await stopBtn.waitFor({ state: 'hidden', timeout: 240000 });
|
||||
|
||||
const downloadBtn = page.getByRole('button', { name: /下载音乐作品|Download music/i }).last();
|
||||
await downloadBtn.click();
|
||||
|
||||
const mp3Item = page.getByRole('menuitem', { name: /纯音频|MP3/i }).first();
|
||||
if (await mp3Item.count()) {
|
||||
await mp3Item.click();
|
||||
} else {
|
||||
const anyItem = page.getByRole('menuitem').first();
|
||||
if (await anyItem.count()) await anyItem.click();
|
||||
}
|
||||
|
||||
await page.waitForTimeout(1200);
|
||||
JS
|
||||
)"
|
||||
pw run-code "$js" >/dev/null
|
||||
}
|
||||
|
||||
mkdir -p "$TARGET"
|
||||
start_ts="$(python3 - <<'PY'
|
||||
import time
|
||||
print(time.time())
|
||||
PY
|
||||
)"
|
||||
|
||||
if [[ "$HEADED" -eq 1 ]]; then
|
||||
pw open "https://gemini.google.com/app" --headed >/dev/null
|
||||
else
|
||||
pw open "https://gemini.google.com/app" >/dev/null
|
||||
fi
|
||||
pw snapshot >/dev/null
|
||||
|
||||
if is_login_required; then
|
||||
echo "Gemini is not logged in. Please log in at https://gemini.google.com/app and rerun." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
enter_music_tool
|
||||
|
||||
for ((i=1; i<=COUNT; i++)); do
|
||||
current_prompt="$PROMPT"
|
||||
if [[ "$COUNT" -gt 1 ]]; then
|
||||
current_prompt="$PROMPT
|
||||
变体要求:这是第 $i / $COUNT 首。保持风格一致,但旋律和节奏细节需要变化。"
|
||||
fi
|
||||
submit_and_download_one "$current_prompt"
|
||||
done
|
||||
|
||||
python3 "$COLLECT_SCRIPT" \
|
||||
--target "$TARGET" \
|
||||
--since "$start_ts" \
|
||||
--expected-count "$COUNT" \
|
||||
--limit "$COUNT" \
|
||||
--prefix "gemini-music" \
|
||||
--prompt "$PROMPT"
|
||||
Reference in New Issue
Block a user