This repository has no description
1from __future__ import annotations
2
3import os
4from typing import Any
5from urllib.parse import urlparse
6
7import httpx
8
9from db import connect, set_crawl_state, upsert_knot
10from progress import banner, log, step
11
12KNOT_VERSION_METHOD = "sh.tangled.knot.version"
13KNOT_OWNER_METHOD = "sh.tangled.owner"
14DEFAULT_SEEDS = ["knot1.tangled.sh"]
15PROBE_TIMEOUT = 15.0
16
17
18def _normalize_hostname(value: str) -> str:
19 value = value.strip()
20 if value.startswith("http://") or value.startswith("https://"):
21 value = urlparse(value).netloc or value
22 return value.rstrip("/")
23
24
25def knot_seeds() -> list[str]:
26 raw = os.getenv("TANGLED_KNOT_SEEDS", "")
27 if raw.strip():
28 return [_normalize_hostname(part) for part in raw.split(",") if part.strip()]
29
30 seeds = list(DEFAULT_SEEDS)
31
32 # Optional auto-discovery: probe knot1..knotN (off by default).
33 max_auto = int(os.getenv("TANGLED_KNOT_PROBE_MAX", "0"))
34 for n in range(2, max_auto + 1):
35 seeds.append(f"knot{n}.tangled.sh")
36
37 extra = os.getenv("TANGLED_KNOT_EXTRA", "")
38 for part in extra.split(","):
39 host = _normalize_hostname(part)
40 if host and host not in seeds:
41 seeds.append(host)
42
43 return seeds
44
45
46def _xrpc_url(hostname: str, method: str) -> str:
47 return f"https://{hostname}/xrpc/{method}"
48
49
50def probe_knot(client: httpx.Client, hostname: str) -> dict[str, Any]:
51 result: dict[str, Any] = {
52 "hostname": hostname,
53 "reachable": False,
54 "owner_did": None,
55 "version": None,
56 "capabilities": None,
57 "version_raw": None,
58 "owner_raw": None,
59 "probe_error": None,
60 }
61
62 try:
63 version_resp = client.get(_xrpc_url(hostname, KNOT_VERSION_METHOD))
64 if version_resp.status_code != 200:
65 result["probe_error"] = f"{KNOT_VERSION_METHOD} HTTP {version_resp.status_code}"
66 return result
67
68 version_raw = version_resp.json()
69 result["version_raw"] = version_raw
70 result["version"] = version_raw.get("version")
71 caps = version_raw.get("capabilities")
72 if isinstance(caps, list):
73 result["capabilities"] = [str(c) for c in caps]
74
75 owner_resp = client.get(_xrpc_url(hostname, KNOT_OWNER_METHOD))
76 if owner_resp.status_code == 200:
77 owner_raw = owner_resp.json()
78 result["owner_raw"] = owner_raw
79 owner = owner_raw.get("owner")
80 if isinstance(owner, str):
81 result["owner_did"] = owner
82
83 result["reachable"] = True
84 return result
85 except httpx.HTTPError as exc:
86 result["probe_error"] = str(exc)
87 return result
88 except ValueError as exc:
89 result["probe_error"] = f"invalid JSON: {exc}"
90 return result
91
92
93def run_stage1(dsn: str) -> dict[str, int]:
94 banner("STAGE 1 — Probe knot servers (infrastructure)")
95 log("stage 1", "Knots are git host servers — NOT the source code itself.")
96 log("stage 1", "This stage checks which knots are alive and records their version/owner.")
97 log("stage 1", "Actual repo code comes in Stage 6 (git log/tree/blob XRPC).")
98
99 seeds = knot_seeds()
100 log("stage 1", f"Probing {len(seeds)} knot hostname(s): {', '.join(seeds)}")
101 if os.getenv("TANGLED_KNOT_PROBE_MAX", "0") == "0":
102 log(
103 "stage 1",
104 "Tip: set TANGLED_KNOT_SEEDS=knot1.tangled.sh,custom.knot.example "
105 "or TANGLED_KNOT_PROBE_MAX=5 to auto-try knot2..knot5.",
106 )
107
108 stats = {"reachable": 0, "unreachable": 0}
109
110 with httpx.Client(timeout=PROBE_TIMEOUT, follow_redirects=True) as client, connect(
111 dsn
112 ) as conn:
113 set_crawl_state(conn, key="stage1:knots", status="running", meta={"seeds": seeds})
114
115 for i, hostname in enumerate(seeds, start=1):
116 step("stage 1", i, len(seeds), f"Probing {hostname} ...")
117 probe = probe_knot(client, hostname)
118
119 upsert_knot(
120 conn,
121 hostname=hostname,
122 reachable=probe["reachable"],
123 owner_did=probe["owner_did"],
124 version=probe["version"],
125 capabilities=probe["capabilities"],
126 version_raw=probe["version_raw"],
127 owner_raw=probe["owner_raw"],
128 probe_error=probe["probe_error"],
129 )
130
131 if probe["reachable"]:
132 stats["reachable"] += 1
133 caps = probe["capabilities"] or []
134 log(
135 "stage 1",
136 f" OK {hostname} version={probe['version']} "
137 f"owner={probe['owner_did'] or '?'} capabilities={caps}",
138 )
139 else:
140 stats["unreachable"] += 1
141 log("stage 1", f" FAIL {hostname} {probe['probe_error']}")
142
143 set_crawl_state(
144 conn,
145 key="stage1:knots",
146 status="complete",
147 meta={"seeds": seeds, **stats},
148 )
149 conn.commit()
150
151 log("stage 1", "Done.")
152 log(
153 "stage 1",
154 f" reachable={stats['reachable']} unreachable={stats['unreachable']}",
155 )
156 if stats["reachable"] == 0:
157 log("stage 1", "WARNING: no reachable knots — check network or seed hostnames.")
158
159 return stats