Monorepo for Tangled
tangled.org
1use std::collections::HashMap;
2
3use serde::Deserialize;
4use worker::*;
5
6/// The JSON value stored in Workers KV, keyed by domain.
7///
8/// Example KV entry:
9/// key: "foo.example.com"
10/// value: {"did": "did:plc:...",
11/// "repos": {"my_repo": {"rkey": "3lk...", "is_index": true},
12/// "other_repo": {"rkey": "3ll...", "is_index": false}}}
13///
14/// The is_index flag on each entry indicates whether it is the index site
15/// for the domain (true) or a sub-path site (false). At most one repo may
16/// be true. The rkey identifies the {did}/{rkey}/ prefix in R2 where the
17/// site's objects live.
18#[derive(Deserialize)]
19struct DomainMapping {
20 #[serde(default)]
21 did: String,
22 /// repo name → entry
23 #[serde(default)]
24 repos: HashMap<String, RepoEntry>,
25}
26
27/// Deserialises from either {"rkey": "...", "is_index": bool} (new shape)
28/// or a bare bool (old shape, where the map key itself was the rkey).
29#[derive(Deserialize)]
30#[serde(untagged)]
31enum RepoEntry {
32 New {
33 rkey: String,
34 #[serde(default)]
35 is_index: bool,
36 },
37 Legacy(bool),
38}
39
40impl RepoEntry {
41 fn is_index(&self) -> bool {
42 match self {
43 RepoEntry::New { is_index, .. } => *is_index,
44 RepoEntry::Legacy(b) => *b,
45 }
46 }
47
48 /// Returns the rkey, falling back to the map key (name) for the legacy
49 /// shape where the key itself was the rkey.
50 fn rkey<'a>(&'a self, name: &'a str) -> &'a str {
51 match self {
52 RepoEntry::New { rkey, .. } => rkey.as_str(),
53 RepoEntry::Legacy(_) => name,
54 }
55 }
56}
57
58impl DomainMapping {
59 /// Returns the (name, entry) pair for the index site, if any.
60 fn index_repo(&self) -> Option<(&str, &RepoEntry)> {
61 self.repos.iter().find_map(|(name, entry)| {
62 if entry.is_index() {
63 Some((name.as_str(), entry))
64 } else {
65 None
66 }
67 })
68 }
69}
70
71/// Build the R2 object key for a given did/rkey and intra-site path.
72/// `site_path` should start with a `/` or be empty.
73fn r2_key(did: &str, rkey: &str, site_path: &str) -> String {
74 let base = format!("{}/{}/", did, rkey);
75 if site_path.is_empty() || site_path == "/" {
76 format!("{}index.html", base)
77 } else {
78 let trimmed = site_path.trim_start_matches('/');
79 if trimmed.is_empty() || trimmed.ends_with('/') {
80 format!("{}{}index.html", base, trimmed)
81 } else {
82 format!("{}{}", base, trimmed)
83 }
84 }
85}
86
87/// Fetch an object from R2, falling back to appending /index.html if the
88/// key looks like a directory (no file extension in the last segment).
89async fn fetch_from_r2(bucket: &Bucket, key: &str) -> Result<Option<Object>> {
90 if let Some(obj) = bucket.get(key).execute().await? {
91 return Ok(Some(obj));
92 }
93
94 let last_segment = key.rsplit('/').next().unwrap_or(key);
95 if !last_segment.contains('.') {
96 let index_key = format!("{}/index.html", key.trim_end_matches('/'));
97 if let Some(obj) = bucket.get(&index_key).execute().await? {
98 return Ok(Some(obj));
99 }
100 }
101
102 Ok(None)
103}
104
105/// Build a Response from an R2 Object, forwarding the content-type header.
106fn response_from_object(obj: Object) -> Result<Response> {
107 let content_type = obj
108 .http_metadata()
109 .content_type
110 .unwrap_or_else(|| "application/octet-stream".to_string());
111
112 let body = obj
113 .body()
114 .ok_or_else(|| Error::RustError("empty R2 body".into()))?;
115 let mut resp = Response::from_body(body.response_body()?)?;
116 resp.headers_mut().set("Content-Type", &content_type)?;
117 resp.headers_mut()
118 .set("Cache-Control", "public, max-age=60")?;
119 Ok(resp)
120}
121
122fn is_excluded(path: &str) -> bool {
123 let excluded = ["/.well-known/atproto-did"];
124 excluded.iter().any(|&prefix| path.starts_with(prefix))
125}
126
127#[event(fetch)]
128async fn fetch(req: Request, env: Env, _ctx: Context) -> Result<Response> {
129 let kv = env.kv("SITES")?;
130 let bucket = env.bucket("SITES_BUCKET")?;
131
132 // Extract host, stripping any port.
133 let host = req.headers().get("host")?.unwrap_or_default();
134 let host = host.split(':').next().unwrap_or("").to_string();
135
136 if host.is_empty() {
137 return Response::error("Bad Request: missing host", 400);
138 }
139
140 let url = req.url()?;
141 let path = url.path();
142
143 if is_excluded(path) {
144 return Fetch::Request(req).send().await;
145 }
146
147 // Single KV lookup for the whole domain.
148 let mapping = match kv.get(&host).text().await? {
149 Some(raw) => match serde_json::from_str::<DomainMapping>(&raw) {
150 Ok(m) => m,
151 Err(_) => return Response::error("Internal Error: bad mapping", 500),
152 },
153 None => return Response::error("site not found!", 404),
154 };
155
156 let path = url.path(); // always starts with "/"
157
158 // First path segment, e.g. "my_repo" from "/my_repo/page.html"
159 let first_segment = path
160 .trim_start_matches('/')
161 .split('/')
162 .next()
163 .unwrap_or("")
164 .to_string();
165
166 // 1. sub-path site
167 // If the first path segment matches a non-index repo, serve from it.
168 if !first_segment.is_empty() {
169 if let Some(entry) = mapping.repos.get(&first_segment) {
170 if !entry.is_index() {
171 // Strip the leading "/{first_segment}" to get the intra-site path.
172 let site_path = path
173 .trim_start_matches('/')
174 .trim_start_matches(&first_segment)
175 .to_string();
176
177 let key = r2_key(&mapping.did, entry.rkey(&first_segment), &site_path);
178 return match fetch_from_r2(&bucket, &key).await? {
179 Some(obj) => response_from_object(obj),
180 None => Response::error("Not Found", 404),
181 };
182 }
183 }
184 }
185
186 // 2. index site
187 // Fall back to the repo marked as the index site, serving the full path.
188 if let Some((name, entry)) = mapping.index_repo() {
189 let key = r2_key(&mapping.did, entry.rkey(name), path);
190 return match fetch_from_r2(&bucket, &key).await? {
191 Some(obj) => response_from_object(obj),
192 None => Response::error("Not Found", 404),
193 };
194 }
195
196 Response::error("Not Found", 404)
197}