This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 6.5 kB View raw
1"""Publish AI-solve questionnaires to the knot-hosted git repo (vectorseachdb). 2 3The generation job dual-writes: it upserts Postgres (agent/questionnaire_store.py) 4AND, when QUESTIONNAIRE_PUBLISH_REPO is set, publishes the questionnaire as a single 5JSON file in the embeddings repo on the knot: 6 7 questionnaires/<did>/<rkey>.json # one file per issue, fetched per-item 8 9Design choices that make this safe in an ephemeral, possibly-concurrent Cloud Run job: 10- **Sparse + partial clone** (`--filter=blob:none --sparse`, sparse-set `questionnaires`) 11 so we never download the ~18 MB embedding matrices that share this repo. 12- **Per-issue unique path** → concurrent jobs touch different files; no content conflicts. 13- **`index.json` is NOT written here** (it would conflict across concurrent jobs) — it's 14 rebuilt by scraper/export_questionnaires.py. Consumers can read files by path directly. 15- **Push with `pull --rebase` + retry** to tolerate the embeddings export pushing too. 16 17Config (env): 18 QUESTIONNAIRE_REPO_GIT_URL e.g. git@tangled.org:did:plc:vg4msk54xucet6of2rdrgahe (required) 19 QUESTIONNAIRE_REPO_DIR local checkout dir (default /tmp/qrepo) 20 QUESTIONNAIRE_REPO_BRANCH default "main" 21 QUESTIONNAIRE_PUBLISH_PUSH "0" to commit but skip push (local testing); default "1" 22 QUESTIONNAIRE_SSH_KEY optional path to the deploy key (added to GIT_SSH_COMMAND) 23 GIT_SSH_COMMAND respected if already set 24""" 25 26from __future__ import annotations 27 28import json 29import os 30import subprocess 31from pathlib import Path 32from typing import Any 33 34_PUSH_RETRIES = 4 35 36 37def publishing_enabled() -> bool: 38 return os.getenv("QUESTIONNAIRE_PUBLISH_REPO", "").strip().lower() in ("1", "true", "yes") 39 40 41def issue_uri_to_relpath(issue_uri: str) -> str: 42 """at://<did>/sh.tangled.repo.issue/<rkey> -> questionnaires/<did>/<rkey>.json 43 (must match scraper/export_questionnaires.py).""" 44 rest = issue_uri[len("at://"):] if issue_uri.startswith("at://") else issue_uri 45 parts = rest.split("/") 46 return f"questionnaires/{parts[0]}/{parts[-1]}.json" 47 48 49def _resolve_ssh_key() -> str | None: 50 """Return a path to a usable private key, or None. 51 52 Prefers QUESTIONNAIRE_SSH_KEY (a path). Otherwise, if QUESTIONNAIRE_SSH_KEY_CONTENTS 53 is set (e.g. a Secret Manager env var in Cloud Run), materialize it to a 0600 temp 54 file — secret *volume* mounts are world-readable, which ssh rejects, so env-injection 55 + chmod is the robust path.""" 56 path = os.getenv("QUESTIONNAIRE_SSH_KEY", "").strip() 57 if path and Path(path).exists(): 58 return path 59 contents = os.getenv("QUESTIONNAIRE_SSH_KEY_CONTENTS", "") 60 if contents.strip(): 61 dest = Path(os.getenv("QUESTIONNAIRE_REPO_DIR", "/tmp/qrepo")).parent / "qrepo_ssh_key" 62 body = contents if contents.endswith("\n") else contents + "\n" 63 dest.write_text(body) 64 dest.chmod(0o600) 65 return str(dest) 66 return None 67 68 69def _git_env() -> dict[str, str]: 70 env = dict(os.environ) 71 if "GIT_SSH_COMMAND" not in env: 72 cmd = "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30" 73 key = _resolve_ssh_key() 74 if key: 75 cmd += f" -i {key} -o IdentitiesOnly=yes" 76 env["GIT_SSH_COMMAND"] = cmd 77 return env 78 79 80def _git(repo: Path, *args: str) -> str: 81 out = subprocess.run( 82 ["git", *args], cwd=str(repo), env=_git_env(), 83 capture_output=True, text=True, 84 ) 85 if out.returncode != 0: 86 raise RuntimeError(f"git {' '.join(args)} failed: {out.stderr.strip() or out.stdout.strip()}") 87 return out.stdout 88 89 90def _ensure_checkout(url: str, repo: Path, branch: str) -> None: 91 if (repo / ".git").is_dir(): 92 _git(repo, "fetch", "origin", branch) 93 _git(repo, "checkout", branch) 94 _git(repo, "reset", "--hard", f"origin/{branch}") 95 return 96 repo.parent.mkdir(parents=True, exist_ok=True) 97 subprocess.run( 98 ["git", "clone", "--filter=blob:none", "--sparse", "--branch", branch, url, str(repo)], 99 env=_git_env(), capture_output=True, text=True, check=True, 100 ) 101 _git(repo, "sparse-checkout", "set", "questionnaires") 102 103 104def _file_record(issue_uri: str, payload: dict[str, Any], created_at, updated_at) -> str: 105 rec = { 106 "issue_uri": issue_uri, 107 "version": payload.get("version") if isinstance(payload, dict) else None, 108 "created_at": created_at.isoformat() if hasattr(created_at, "isoformat") else created_at, 109 "updated_at": updated_at.isoformat() if hasattr(updated_at, "isoformat") else updated_at, 110 "payload": payload, 111 } 112 return json.dumps(rec, ensure_ascii=False, indent=2) + "\n" 113 114 115def publish_to_repo(issue_uri: str, payload: dict[str, Any], created_at=None, updated_at=None) -> str: 116 """Write the questionnaire file, commit, and (unless disabled) push. Returns the 117 relative path written. Raises on failure — callers treat publishing as best-effort.""" 118 url = os.getenv("QUESTIONNAIRE_REPO_GIT_URL", "").strip() 119 if not url: 120 raise RuntimeError("QUESTIONNAIRE_REPO_GIT_URL is not set") 121 repo = Path(os.getenv("QUESTIONNAIRE_REPO_DIR", "/tmp/qrepo")).expanduser() 122 branch = os.getenv("QUESTIONNAIRE_REPO_BRANCH", "main") 123 do_push = os.getenv("QUESTIONNAIRE_PUBLISH_PUSH", "1").strip().lower() not in ("0", "false", "no") 124 125 _ensure_checkout(url, repo, branch) 126 127 rel = issue_uri_to_relpath(issue_uri) 128 path = repo / rel 129 path.parent.mkdir(parents=True, exist_ok=True) 130 path.write_text(_file_record(issue_uri, payload, created_at, updated_at), encoding="utf-8") 131 132 _git(repo, "add", rel) 133 if not _git(repo, "status", "--porcelain").strip(): 134 return rel # no change (identical content) — nothing to commit 135 _git(repo, "-c", "user.name=tangled-questionnaire", "-c", "user.email=bot@stuhi.org", 136 "commit", "-m", f"questionnaire: {issue_uri}") 137 138 if not do_push: 139 return rel 140 last_err: Exception | None = None 141 for _ in range(_PUSH_RETRIES): 142 try: 143 _git(repo, "push", "origin", branch) 144 return rel 145 except RuntimeError as e: # non-fast-forward (a concurrent push) — rebase + retry 146 last_err = e 147 try: 148 _git(repo, "pull", "--rebase", "origin", branch) 149 except RuntimeError as pe: 150 last_err = pe 151 break 152 raise RuntimeError(f"push failed after retries: {last_err}")