This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 5.3 kB View raw
1from __future__ import annotations 2 3import os 4from typing import Any 5from urllib.parse import urlparse 6 7import httpx 8 9from db import connect, set_crawl_state, upsert_knot 10from progress import banner, log, step 11 12KNOT_VERSION_METHOD = "sh.tangled.knot.version" 13KNOT_OWNER_METHOD = "sh.tangled.owner" 14DEFAULT_SEEDS = ["knot1.tangled.sh"] 15PROBE_TIMEOUT = 15.0 16 17 18def _normalize_hostname(value: str) -> str: 19 value = value.strip() 20 if value.startswith("http://") or value.startswith("https://"): 21 value = urlparse(value).netloc or value 22 return value.rstrip("/") 23 24 25def knot_seeds() -> list[str]: 26 raw = os.getenv("TANGLED_KNOT_SEEDS", "") 27 if raw.strip(): 28 return [_normalize_hostname(part) for part in raw.split(",") if part.strip()] 29 30 seeds = list(DEFAULT_SEEDS) 31 32 # Optional auto-discovery: probe knot1..knotN (off by default). 33 max_auto = int(os.getenv("TANGLED_KNOT_PROBE_MAX", "0")) 34 for n in range(2, max_auto + 1): 35 seeds.append(f"knot{n}.tangled.sh") 36 37 extra = os.getenv("TANGLED_KNOT_EXTRA", "") 38 for part in extra.split(","): 39 host = _normalize_hostname(part) 40 if host and host not in seeds: 41 seeds.append(host) 42 43 return seeds 44 45 46def _xrpc_url(hostname: str, method: str) -> str: 47 return f"https://{hostname}/xrpc/{method}" 48 49 50def probe_knot(client: httpx.Client, hostname: str) -> dict[str, Any]: 51 result: dict[str, Any] = { 52 "hostname": hostname, 53 "reachable": False, 54 "owner_did": None, 55 "version": None, 56 "capabilities": None, 57 "version_raw": None, 58 "owner_raw": None, 59 "probe_error": None, 60 } 61 62 try: 63 version_resp = client.get(_xrpc_url(hostname, KNOT_VERSION_METHOD)) 64 if version_resp.status_code != 200: 65 result["probe_error"] = f"{KNOT_VERSION_METHOD} HTTP {version_resp.status_code}" 66 return result 67 68 version_raw = version_resp.json() 69 result["version_raw"] = version_raw 70 result["version"] = version_raw.get("version") 71 caps = version_raw.get("capabilities") 72 if isinstance(caps, list): 73 result["capabilities"] = [str(c) for c in caps] 74 75 owner_resp = client.get(_xrpc_url(hostname, KNOT_OWNER_METHOD)) 76 if owner_resp.status_code == 200: 77 owner_raw = owner_resp.json() 78 result["owner_raw"] = owner_raw 79 owner = owner_raw.get("owner") 80 if isinstance(owner, str): 81 result["owner_did"] = owner 82 83 result["reachable"] = True 84 return result 85 except httpx.HTTPError as exc: 86 result["probe_error"] = str(exc) 87 return result 88 except ValueError as exc: 89 result["probe_error"] = f"invalid JSON: {exc}" 90 return result 91 92 93def run_stage1(dsn: str) -> dict[str, int]: 94 banner("STAGE 1 — Probe knot servers (infrastructure)") 95 log("stage 1", "Knots are git host servers — NOT the source code itself.") 96 log("stage 1", "This stage checks which knots are alive and records their version/owner.") 97 log("stage 1", "Actual repo code comes in Stage 6 (git log/tree/blob XRPC).") 98 99 seeds = knot_seeds() 100 log("stage 1", f"Probing {len(seeds)} knot hostname(s): {', '.join(seeds)}") 101 if os.getenv("TANGLED_KNOT_PROBE_MAX", "0") == "0": 102 log( 103 "stage 1", 104 "Tip: set TANGLED_KNOT_SEEDS=knot1.tangled.sh,custom.knot.example " 105 "or TANGLED_KNOT_PROBE_MAX=5 to auto-try knot2..knot5.", 106 ) 107 108 stats = {"reachable": 0, "unreachable": 0} 109 110 with httpx.Client(timeout=PROBE_TIMEOUT, follow_redirects=True) as client, connect( 111 dsn 112 ) as conn: 113 set_crawl_state(conn, key="stage1:knots", status="running", meta={"seeds": seeds}) 114 115 for i, hostname in enumerate(seeds, start=1): 116 step("stage 1", i, len(seeds), f"Probing {hostname} ...") 117 probe = probe_knot(client, hostname) 118 119 upsert_knot( 120 conn, 121 hostname=hostname, 122 reachable=probe["reachable"], 123 owner_did=probe["owner_did"], 124 version=probe["version"], 125 capabilities=probe["capabilities"], 126 version_raw=probe["version_raw"], 127 owner_raw=probe["owner_raw"], 128 probe_error=probe["probe_error"], 129 ) 130 131 if probe["reachable"]: 132 stats["reachable"] += 1 133 caps = probe["capabilities"] or [] 134 log( 135 "stage 1", 136 f" OK {hostname} version={probe['version']} " 137 f"owner={probe['owner_did'] or '?'} capabilities={caps}", 138 ) 139 else: 140 stats["unreachable"] += 1 141 log("stage 1", f" FAIL {hostname} {probe['probe_error']}") 142 143 set_crawl_state( 144 conn, 145 key="stage1:knots", 146 status="complete", 147 meta={"seeds": seeds, **stats}, 148 ) 149 conn.commit() 150 151 log("stage 1", "Done.") 152 log( 153 "stage 1", 154 f" reachable={stats['reachable']} unreachable={stats['unreachable']}", 155 ) 156 if stats["reachable"] == 0: 157 log("stage 1", "WARNING: no reachable knots — check network or seed hostnames.") 158 159 return stats