Monorepo for Tangled
tangled.org
1use std::collections::HashMap;
2
3use serde::Deserialize;
4use worker::*;
5
6/// The JSON value stored in Workers KV, keyed by domain.
7///
8/// Example KV entry:
9/// key: "foo.example.com"
10/// value: {"did": "did:plc:...",
11/// "repos": {"my_repo": {"rkey": "3lk...", "is_index": true},
12/// "other_repo": {"rkey": "3ll...", "is_index": false}}}
13///
14/// The is_index flag on each entry indicates whether it is the index site
15/// for the domain (true) or a sub-path site (false). At most one repo may
16/// be true. The rkey identifies the {did}/{rkey}/ prefix in R2 where the
17/// site's objects live.
18#[derive(Deserialize)]
19struct DomainMapping {
20 #[serde(default)]
21 did: String,
22 /// repo name → entry
23 #[serde(default)]
24 repos: HashMap<String, RepoEntry>,
25}
26
27/// Deserialises from either {"rkey": "...", "is_index": bool} (new shape)
28/// or a bare bool (old shape, where the map key itself was the rkey).
29#[derive(Deserialize)]
30#[serde(untagged)]
31enum RepoEntry {
32 New {
33 rkey: String,
34 #[serde(default)]
35 is_index: bool,
36 },
37 Legacy(bool),
38}
39
40impl RepoEntry {
41 fn is_index(&self) -> bool {
42 match self {
43 RepoEntry::New { is_index, .. } => *is_index,
44 RepoEntry::Legacy(b) => *b,
45 }
46 }
47
48 /// Returns the rkey, falling back to the map key (name) for the legacy
49 /// shape where the key itself was the rkey.
50 fn rkey<'a>(&'a self, name: &'a str) -> &'a str {
51 match self {
52 RepoEntry::New { rkey, .. } => rkey.as_str(),
53 RepoEntry::Legacy(_) => name,
54 }
55 }
56}
57
58impl DomainMapping {
59 /// Returns the (name, entry) pair for the index site, if any.
60 fn index_repo(&self) -> Option<(&str, &RepoEntry)> {
61 self.repos.iter().find_map(|(name, entry)| {
62 if entry.is_index() {
63 Some((name.as_str(), entry))
64 } else {
65 None
66 }
67 })
68 }
69}
70
71/// Build the R2 object key for a given did/rkey and intra-site path.
72/// `site_path` should start with a `/` or be empty.
73fn r2_key(did: &str, rkey: &str, site_path: &str) -> String {
74 let base = format!("{}/{}/", did, rkey);
75 if site_path.is_empty() || site_path == "/" {
76 format!("{}index.html", base)
77 } else {
78 let trimmed = site_path.trim_start_matches('/');
79 if trimmed.is_empty() || trimmed.ends_with('/') {
80 format!("{}{}index.html", base, trimmed)
81 } else {
82 format!("{}{}", base, trimmed)
83 }
84 }
85}
86
87/// Returns true when a directory-like path is missing a trailing slash.
88///
89/// Examples:
90/// - "/docs" => true
91/// - "/docs/" => false
92/// - "/file.txt" => false
93/// - "/" => false
94fn needs_trailing_slash(path: &str) -> bool {
95 if path == "/" || path.ends_with('/') {
96 return false;
97 }
98 let last_segment = path.rsplit('/').next().unwrap_or(path);
99 !last_segment.contains('.')
100}
101
102/// Return the canonical URL with a trailing slash appended to the path.
103fn with_trailing_slash(url: &Url) -> String {
104 let mut url = url.clone();
105 url.set_path(&format!("{}/", url.path()));
106 url.to_string()
107}
108
109/// Fetch an object from R2, falling back to appending /index.html if the
110/// key looks like a directory (no file extension in the last segment).
111async fn fetch_from_r2(bucket: &Bucket, key: &str) -> Result<Option<Object>> {
112 if let Some(obj) = bucket.get(key).execute().await? {
113 return Ok(Some(obj));
114 }
115
116 let last_segment = key.rsplit('/').next().unwrap_or(key);
117 if !last_segment.contains('.') {
118 let index_key = format!("{}/index.html", key.trim_end_matches('/'));
119 if let Some(obj) = bucket.get(&index_key).execute().await? {
120 return Ok(Some(obj));
121 }
122 }
123
124 Ok(None)
125}
126
127/// Build a Response from an R2 Object, forwarding the content-type header.
128fn response_from_object(obj: Object) -> Result<Response> {
129 let content_type = obj
130 .http_metadata()
131 .content_type
132 .unwrap_or_else(|| "application/octet-stream".to_string());
133
134 let body = obj
135 .body()
136 .ok_or_else(|| Error::RustError("empty R2 body".into()))?;
137 let mut resp = Response::from_body(body.response_body()?)?;
138 resp.headers_mut().set("Content-Type", &content_type)?;
139 resp.headers_mut()
140 .set("Cache-Control", "public, max-age=60")?;
141 Ok(resp)
142}
143
144fn is_excluded(path: &str) -> bool {
145 let excluded = ["/.well-known/atproto-did"];
146 excluded.iter().any(|&prefix| path.starts_with(prefix))
147}
148
149#[event(fetch)]
150async fn fetch(req: Request, env: Env, _ctx: Context) -> Result<Response> {
151 let kv = env.kv("SITES")?;
152 let bucket = env.bucket("SITES_BUCKET")?;
153
154 // Extract host, stripping any port.
155 let host = req.headers().get("host")?.unwrap_or_default();
156 let host = host.split(':').next().unwrap_or("").to_string();
157
158 if host.is_empty() {
159 return Response::error("Bad Request: missing host", 400);
160 }
161
162 let url = req.url()?;
163 let path = url.path();
164
165 if is_excluded(path) {
166 return Fetch::Request(req).send().await;
167 }
168
169 // Canonical redirect for directory-like paths.
170 if needs_trailing_slash(path) {
171 let redirect_url = with_trailing_slash(&url);
172 return Response::redirect_with_status(redirect_url.parse()?, 308);
173 }
174
175 // Single KV lookup for the whole domain.
176 let mapping = match kv.get(&host).text().await? {
177 Some(raw) => match serde_json::from_str::<DomainMapping>(&raw) {
178 Ok(m) => m,
179 Err(_) => return Response::error("Internal Error: bad mapping", 500),
180 },
181 None => return Response::error("site not found!", 404),
182 };
183
184 let path = url.path(); // always starts with "/"
185
186 // First path segment, e.g. "my_repo" from "/my_repo/page.html"
187 let first_segment = path
188 .trim_start_matches('/')
189 .split('/')
190 .next()
191 .unwrap_or("")
192 .to_string();
193
194 // 1. sub-path site
195 // If the first path segment matches a non-index repo, serve from it.
196 if !first_segment.is_empty() {
197 if let Some(entry) = mapping.repos.get(&first_segment) {
198 if !entry.is_index() {
199 // Strip the leading "/{first_segment}" to get the intra-site path.
200 let site_path = path
201 .trim_start_matches('/')
202 .trim_start_matches(&first_segment)
203 .to_string();
204
205 let key = r2_key(&mapping.did, entry.rkey(&first_segment), &site_path);
206 return match fetch_from_r2(&bucket, &key).await? {
207 Some(obj) => response_from_object(obj),
208 None => Response::error("Not Found", 404),
209 };
210 }
211 }
212 }
213
214 // 2. index site
215 // Fall back to the repo marked as the index site, serving the full path.
216 if let Some((name, entry)) = mapping.index_repo() {
217 let key = r2_key(&mapping.did, entry.rkey(name), path);
218 return match fetch_from_r2(&bucket, &key).await? {
219 Some(obj) => response_from_object(obj),
220 None => Response::error("Not Found", 404),
221 };
222 }
223
224 Response::error("Not Found", 404)
225}