This repository has no description
1"""Publish AI-solve questionnaires to the knot-hosted git repo (vectorseachdb).
2
3The generation job dual-writes: it upserts Postgres (agent/questionnaire_store.py)
4AND, when QUESTIONNAIRE_PUBLISH_REPO is set, publishes the questionnaire as a single
5JSON file in the embeddings repo on the knot:
6
7 questionnaires/<did>/<rkey>.json # one file per issue, fetched per-item
8
9Design choices that make this safe in an ephemeral, possibly-concurrent Cloud Run job:
10- **Sparse + partial clone** (`--filter=blob:none --sparse`, sparse-set `questionnaires`)
11 so we never download the ~18 MB embedding matrices that share this repo.
12- **Per-issue unique path** → concurrent jobs touch different files; no content conflicts.
13- **`index.json` is NOT written here** (it would conflict across concurrent jobs) — it's
14 rebuilt by scraper/export_questionnaires.py. Consumers can read files by path directly.
15- **Push with `pull --rebase` + retry** to tolerate the embeddings export pushing too.
16
17Config (env):
18 QUESTIONNAIRE_REPO_GIT_URL e.g. git@tangled.org:did:plc:vg4msk54xucet6of2rdrgahe (required)
19 QUESTIONNAIRE_REPO_DIR local checkout dir (default /tmp/qrepo)
20 QUESTIONNAIRE_REPO_BRANCH default "main"
21 QUESTIONNAIRE_PUBLISH_PUSH "0" to commit but skip push (local testing); default "1"
22 QUESTIONNAIRE_SSH_KEY optional path to the deploy key (added to GIT_SSH_COMMAND)
23 GIT_SSH_COMMAND respected if already set
24"""
25
26from __future__ import annotations
27
28import json
29import os
30import subprocess
31from pathlib import Path
32from typing import Any
33
34_PUSH_RETRIES = 4
35
36
37def publishing_enabled() -> bool:
38 return os.getenv("QUESTIONNAIRE_PUBLISH_REPO", "").strip().lower() in ("1", "true", "yes")
39
40
41def issue_uri_to_relpath(issue_uri: str) -> str:
42 """at://<did>/sh.tangled.repo.issue/<rkey> -> questionnaires/<did>/<rkey>.json
43 (must match scraper/export_questionnaires.py)."""
44 rest = issue_uri[len("at://"):] if issue_uri.startswith("at://") else issue_uri
45 parts = rest.split("/")
46 return f"questionnaires/{parts[0]}/{parts[-1]}.json"
47
48
49def _resolve_ssh_key() -> str | None:
50 """Return a path to a usable private key, or None.
51
52 Prefers QUESTIONNAIRE_SSH_KEY (a path). Otherwise, if QUESTIONNAIRE_SSH_KEY_CONTENTS
53 is set (e.g. a Secret Manager env var in Cloud Run), materialize it to a 0600 temp
54 file — secret *volume* mounts are world-readable, which ssh rejects, so env-injection
55 + chmod is the robust path."""
56 path = os.getenv("QUESTIONNAIRE_SSH_KEY", "").strip()
57 if path and Path(path).exists():
58 return path
59 contents = os.getenv("QUESTIONNAIRE_SSH_KEY_CONTENTS", "")
60 if contents.strip():
61 dest = Path(os.getenv("QUESTIONNAIRE_REPO_DIR", "/tmp/qrepo")).parent / "qrepo_ssh_key"
62 body = contents if contents.endswith("\n") else contents + "\n"
63 dest.write_text(body)
64 dest.chmod(0o600)
65 return str(dest)
66 return None
67
68
69def _git_env() -> dict[str, str]:
70 env = dict(os.environ)
71 if "GIT_SSH_COMMAND" not in env:
72 cmd = "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30"
73 key = _resolve_ssh_key()
74 if key:
75 cmd += f" -i {key} -o IdentitiesOnly=yes"
76 env["GIT_SSH_COMMAND"] = cmd
77 return env
78
79
80def _git(repo: Path, *args: str) -> str:
81 out = subprocess.run(
82 ["git", *args], cwd=str(repo), env=_git_env(),
83 capture_output=True, text=True,
84 )
85 if out.returncode != 0:
86 raise RuntimeError(f"git {' '.join(args)} failed: {out.stderr.strip() or out.stdout.strip()}")
87 return out.stdout
88
89
90def _ensure_checkout(url: str, repo: Path, branch: str) -> None:
91 if (repo / ".git").is_dir():
92 _git(repo, "fetch", "origin", branch)
93 _git(repo, "checkout", branch)
94 _git(repo, "reset", "--hard", f"origin/{branch}")
95 return
96 repo.parent.mkdir(parents=True, exist_ok=True)
97 subprocess.run(
98 ["git", "clone", "--filter=blob:none", "--sparse", "--branch", branch, url, str(repo)],
99 env=_git_env(), capture_output=True, text=True, check=True,
100 )
101 _git(repo, "sparse-checkout", "set", "questionnaires")
102
103
104def _file_record(issue_uri: str, payload: dict[str, Any], created_at, updated_at) -> str:
105 rec = {
106 "issue_uri": issue_uri,
107 "version": payload.get("version") if isinstance(payload, dict) else None,
108 "created_at": created_at.isoformat() if hasattr(created_at, "isoformat") else created_at,
109 "updated_at": updated_at.isoformat() if hasattr(updated_at, "isoformat") else updated_at,
110 "payload": payload,
111 }
112 return json.dumps(rec, ensure_ascii=False, indent=2) + "\n"
113
114
115def publish_to_repo(issue_uri: str, payload: dict[str, Any], created_at=None, updated_at=None) -> str:
116 """Write the questionnaire file, commit, and (unless disabled) push. Returns the
117 relative path written. Raises on failure — callers treat publishing as best-effort."""
118 url = os.getenv("QUESTIONNAIRE_REPO_GIT_URL", "").strip()
119 if not url:
120 raise RuntimeError("QUESTIONNAIRE_REPO_GIT_URL is not set")
121 repo = Path(os.getenv("QUESTIONNAIRE_REPO_DIR", "/tmp/qrepo")).expanduser()
122 branch = os.getenv("QUESTIONNAIRE_REPO_BRANCH", "main")
123 do_push = os.getenv("QUESTIONNAIRE_PUBLISH_PUSH", "1").strip().lower() not in ("0", "false", "no")
124
125 _ensure_checkout(url, repo, branch)
126
127 rel = issue_uri_to_relpath(issue_uri)
128 path = repo / rel
129 path.parent.mkdir(parents=True, exist_ok=True)
130 path.write_text(_file_record(issue_uri, payload, created_at, updated_at), encoding="utf-8")
131
132 _git(repo, "add", rel)
133 if not _git(repo, "status", "--porcelain").strip():
134 return rel # no change (identical content) — nothing to commit
135 _git(repo, "-c", "user.name=tangled-questionnaire", "-c", "user.email=bot@stuhi.org",
136 "commit", "-m", f"questionnaire: {issue_uri}")
137
138 if not do_push:
139 return rel
140 last_err: Exception | None = None
141 for _ in range(_PUSH_RETRIES):
142 try:
143 _git(repo, "push", "origin", branch)
144 return rel
145 except RuntimeError as e: # non-fast-forward (a concurrent push) — rebase + retry
146 last_err = e
147 try:
148 _git(repo, "pull", "--rebase", "origin", branch)
149 except RuntimeError as pe:
150 last_err = pe
151 break
152 raise RuntimeError(f"push failed after retries: {last_err}")