Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
1use serde::Deserialize;
2use url::Url;
3use std::{collections::HashMap, time::Duration};
4use crate::{Repo, server::HydrationSource, error::ProxyError};
5use reqwest::Client;
6use serde_json::{Map, Value};
7
8pub enum ParamValue {
9 String(Vec<String>),
10 Int(Vec<i64>),
11 Bool(Vec<bool>),
12}
13pub struct Params(HashMap<String, ParamValue>);
14
15impl TryFrom<Map<String, Value>> for Params {
16 type Error = (); // TODO
17 fn try_from(val: Map<String, Value>) -> Result<Self, Self::Error> {
18 let mut out = HashMap::new();
19 for (k, v) in val {
20 match v {
21 Value::String(s) => out.insert(k, ParamValue::String(vec![s])),
22 Value::Bool(b) => out.insert(k, ParamValue::Bool(vec![b])),
23 Value::Number(n) => {
24 let Some(i) = n.as_i64() else {
25 return Err(());
26 };
27 out.insert(k, ParamValue::Int(vec![i]))
28 }
29 Value::Array(a) => {
30 let Some(first) = a.first() else {
31 continue;
32 };
33 if first.is_string() {
34 let mut vals = Vec::with_capacity(a.len());
35 for v in a {
36 let Some(v) = v.as_str() else {
37 return Err(());
38 };
39 vals.push(v.to_string());
40 }
41 out.insert(k, ParamValue::String(vals));
42 } else if first.is_i64() {
43 let mut vals = Vec::with_capacity(a.len());
44 for v in a {
45 let Some(v) = v.as_i64() else {
46 return Err(());
47 };
48 vals.push(v);
49 }
50 out.insert(k, ParamValue::Int(vals));
51 } else if first.is_boolean() {
52 let mut vals = Vec::with_capacity(a.len());
53 for v in a {
54 let Some(v) = v.as_bool() else {
55 return Err(());
56 };
57 vals.push(v);
58 }
59 out.insert(k, ParamValue::Bool(vals));
60 }
61 todo!();
62 }
63 _ => return Err(()),
64 };
65 }
66
67 Ok(Self(out))
68 }
69}
70
71#[derive(Clone)]
72pub struct Proxy {
73 repo: Repo,
74 client: Client,
75}
76
77impl Proxy {
78 pub fn new(repo: Repo) -> Self {
79 let client = Client::builder()
80 .user_agent(format!(
81 "microcosm slingshot v{} (contact: @bad-example.com)",
82 env!("CARGO_PKG_VERSION")
83 ))
84 .no_proxy()
85 .timeout(Duration::from_secs(6))
86 .build()
87 .unwrap();
88 Self { repo, client }
89 }
90
91 pub async fn proxy(
92 &self,
93 xrpc: String,
94 service: String,
95 params: Option<Map<String, Value>>,
96 ) -> Result<Value, ProxyError> {
97
98 // hackin it to start
99
100 // 1. assume did-web (TODO) and get the did doc
101 #[derive(Debug, Deserialize)]
102 struct ServiceDoc {
103 id: String,
104 service: Vec<ServiceItem>,
105 }
106 #[derive(Debug, Deserialize)]
107 struct ServiceItem {
108 id: String,
109 #[expect(unused)]
110 r#type: String,
111 #[serde(rename = "serviceEndpoint")]
112 service_endpoint: Url,
113 }
114 let dw = service.strip_prefix("did:web:").expect("a did web");
115 let (dw, service_id) = dw.split_once("#").expect("whatever");
116 let mut dw_url = Url::parse(&format!("https://{dw}"))?;
117 dw_url.set_path("/.well-known/did.json");
118 let doc: ServiceDoc = self.client
119 .get(dw_url)
120 .send()
121 .await?
122 .error_for_status()?
123 .json()
124 .await?;
125
126 assert_eq!(doc.id, format!("did:web:{}", dw));
127
128 let mut upstream = None;
129 for ServiceItem { id, service_endpoint, .. } in doc.service {
130 let Some((_, id)) = id.split_once("#") else { continue; };
131 if id != service_id { continue; };
132 upstream = Some(service_endpoint);
133 break;
134 }
135
136 // 2. proxy the request forward
137 let mut upstream = upstream.expect("to find it");
138 upstream.set_path(&format!("/xrpc/{xrpc}")); // TODO: validate nsid
139
140 if let Some(params) = params {
141 let mut query = upstream.query_pairs_mut();
142 let Params(ps) = params.try_into().expect("valid params");
143 for (k, pvs) in ps {
144 match pvs {
145 ParamValue::String(s) => {
146 for s in s {
147 query.append_pair(&k, &s);
148 }
149 }
150 ParamValue::Int(i) => {
151 for i in i {
152 query.append_pair(&k, &i.to_string());
153 }
154 }
155 ParamValue::Bool(b) => {
156 for b in b {
157 query.append_pair(&k, &b.to_string());
158 }
159 }
160 }
161 }
162 }
163
164 // TODO: other headers to proxy
165 Ok(self.client
166 .get(upstream)
167 .send()
168 .await?
169 .error_for_status()?
170 .json()
171 .await?)
172 }
173}
174
175#[derive(Debug, PartialEq)]
176pub enum PathPart {
177 Scalar(String),
178 Vector(String, Option<String>), // key, $type
179}
180
181pub fn parse_record_path(input: &str) -> Result<Vec<PathPart>, String> {
182 let mut out = Vec::new();
183
184 let mut key_acc = String::new();
185 let mut type_acc = String::new();
186 let mut in_bracket = false;
187 let mut chars = input.chars().enumerate();
188 while let Some((i, c)) = chars.next() {
189 match c {
190 '[' if in_bracket => return Err(format!("nested opening bracket not allowed, at {i}")),
191 '[' if key_acc.is_empty() => return Err(format!("missing key before opening bracket, at {i}")),
192 '[' => in_bracket = true,
193 ']' if in_bracket => {
194 in_bracket = false;
195 let key = std::mem::take(&mut key_acc);
196 let r#type = std::mem::take(&mut type_acc);
197 let t = if r#type.is_empty() { None } else { Some(r#type) };
198 out.push(PathPart::Vector(key, t));
199 // peek ahead because we need a dot after array if there's more and i don't want to add more loop state
200 let Some((i, c)) = chars.next() else {
201 break;
202 };
203 if c != '.' {
204 return Err(format!("expected dot after close bracket, found {c:?} at {i}"));
205 }
206 }
207 ']' => return Err(format!("unexpected close bracket at {i}")),
208 '.' if in_bracket => type_acc.push(c),
209 '.' if key_acc.is_empty() => return Err(format!("missing key before next segment, at {i}")),
210 '.' => {
211 let key = std::mem::take(&mut key_acc);
212 assert!(type_acc.is_empty());
213 out.push(PathPart::Scalar(key));
214 }
215 _ if in_bracket => type_acc.push(c),
216 _ => key_acc.push(c),
217 }
218 }
219 if in_bracket {
220 return Err("unclosed bracket".into());
221 }
222 if !key_acc.is_empty() {
223 out.push(PathPart::Scalar(key_acc));
224 }
225 Ok(out)
226}
227
228#[derive(Debug, Clone, PartialEq)]
229pub enum RefShape {
230 StrongRef,
231 AtUri,
232 AtUriParts,
233 Did,
234 Handle,
235 AtIdentifier,
236 Blob,
237 // TODO: blob with type?
238}
239
240impl TryFrom<&str> for RefShape {
241 type Error = String;
242 fn try_from(s: &str) -> Result<Self, Self::Error> {
243 match s {
244 "strong-ref" => Ok(Self::StrongRef),
245 "at-uri" => Ok(Self::AtUri),
246 "at-uri-parts" => Ok(Self::AtUriParts),
247 "did" => Ok(Self::Did),
248 "handle" => Ok(Self::Handle),
249 "at-identifier" => Ok(Self::AtIdentifier),
250 "blob" => Ok(Self::Blob),
251 _ => Err(format!("unknown shape: {s}")),
252 }
253 }
254}
255
256#[derive(Debug, PartialEq)]
257pub enum MatchedRef {
258 AtUri {
259 uri: String,
260 cid: Option<String>,
261 },
262 Identifier(String),
263 Blob {
264 link: String,
265 mime: String,
266 size: u64,
267 }
268}
269
270pub fn match_shape(shape: &RefShape, val: &Value) -> Option<MatchedRef> {
271 // TODO: actually validate at-uri format
272 // TODO: actually validate everything else also
273 // TODO: should this function normalize identifiers to DIDs probably?
274 // or just return at-uri parts so the caller can resolve and reassemble
275 match shape {
276 RefShape::StrongRef => {
277 let o = val.as_object()?;
278 let uri = o.get("uri")?.as_str()?.to_string();
279 let cid = o.get("cid")?.as_str()?.to_string();
280 Some(MatchedRef::AtUri { uri, cid: Some(cid) })
281 }
282 RefShape::AtUri => {
283 let uri = val.as_str()?.to_string();
284 Some(MatchedRef::AtUri { uri, cid: None })
285 }
286 RefShape::AtUriParts => {
287 let o = val.as_object()?;
288 let identifier = o.get("repo").or(o.get("did"))?.as_str()?.to_string();
289 let collection = o.get("collection")?.as_str()?.to_string();
290 let rkey = o.get("rkey")?.as_str()?.to_string();
291 let uri = format!("at://{identifier}/{collection}/{rkey}");
292 let cid = o.get("cid").and_then(|v| v.as_str()).map(str::to_string);
293 Some(MatchedRef::AtUri { uri, cid })
294 }
295 RefShape::Did => {
296 let id = val.as_str()?;
297 if !id.starts_with("did:") {
298 return None;
299 }
300 Some(MatchedRef::Identifier(id.to_string()))
301 }
302 RefShape::Handle => {
303 let id = val.as_str()?;
304 if id.contains(':') {
305 return None;
306 }
307 Some(MatchedRef::Identifier(id.to_string()))
308 }
309 RefShape::AtIdentifier => {
310 Some(MatchedRef::Identifier(val.as_str()?.to_string()))
311 }
312 RefShape::Blob => {
313 let o = val.as_object()?;
314 if o.get("$type")? != "blob" {
315 return None;
316 }
317 let link = o.get("ref")?.as_object()?.get("$link")?.as_str()?.to_string();
318 let mime = o.get("mimeType")?.as_str()?.to_string();
319 let size = o.get("size")?.as_u64()?;
320 Some(MatchedRef::Blob { link, mime, size })
321 }
322 }
323}
324
325// TODO: send back metadata about the matching
326pub fn extract_links(
327 sources: Vec<HydrationSource>,
328 skeleton: &Value,
329) -> Result<Vec<MatchedRef>, String> {
330 // collect early to catch errors from the client
331 // (TODO maybe the handler should do this and pass in the processed stuff probably definitely yeah)
332 let sources = sources
333 .into_iter()
334 .map(|HydrationSource { path, shape }| {
335 let path_parts = parse_record_path(&path)?;
336 let shape: RefShape = shape.as_str().try_into()?;
337 Ok((path_parts, shape))
338 })
339 .collect::<Result<Vec<_>, String>>()?;
340
341 // lazy first impl, just re-walk the skeleton as many times as needed
342 // not deduplicating for now
343 let mut out = Vec::new();
344 for (path_parts, shape) in sources {
345 for val in PathWalker::new(&path_parts, skeleton) {
346 if let Some(matched) = match_shape(&shape, val) {
347 out.push(matched);
348 }
349 }
350 }
351
352 Ok(out)
353}
354
355struct PathWalker<'a> {
356 todo: Vec<(&'a [PathPart], &'a Value)>,
357}
358impl<'a> PathWalker<'a> {
359 fn new(path_parts: &'a [PathPart], skeleton: &'a Value) -> Self {
360 Self { todo: vec![(path_parts, skeleton)] }
361 }
362}
363impl<'a> Iterator for PathWalker<'a> {
364 type Item = &'a Value;
365 fn next(&mut self) -> Option<Self::Item> {
366 loop {
367 let (parts, val) = self.todo.pop()?;
368 let Some((part, rest)) = parts.split_first() else {
369 return Some(val);
370 };
371 let Some(o) = val.as_object() else {
372 continue;
373 };
374 match part {
375 PathPart::Scalar(k) => {
376 let Some(v) = o.get(k) else {
377 continue;
378 };
379 self.todo.push((rest, v));
380 }
381 PathPart::Vector(k, t) => {
382 let Some(a) = o.get(k).and_then(|v| v.as_array()) else {
383 continue;
384 };
385 for v in a
386 .iter()
387 .rev()
388 .filter(|c| {
389 let Some(t) = t else { return true };
390 c
391 .as_object()
392 .and_then(|o| o.get("$type"))
393 .and_then(|v| v.as_str())
394 .map(|s| s == t)
395 .unwrap_or(false)
396 })
397 {
398 self.todo.push((rest, v))
399 }
400 }
401 }
402 }
403 }
404}
405
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410 use serde_json::json;
411
412 #[test]
413 fn test_parse_record_path() -> Result<(), Box<dyn std::error::Error>> {
414 let cases = [
415 ("", vec![]),
416 ("subject", vec![PathPart::Scalar("subject".into())]),
417 ("authorDid", vec![PathPart::Scalar("authorDid".into())]),
418 ("subject.uri", vec![PathPart::Scalar("subject".into()), PathPart::Scalar("uri".into())]),
419 ("members[]", vec![PathPart::Vector("members".into(), None)]),
420 ("add[].key", vec![
421 PathPart::Vector("add".into(), None),
422 PathPart::Scalar("key".into()),
423 ]),
424 ("a[b]", vec![PathPart::Vector("a".into(), Some("b".into()))]),
425 ("a[b.c]", vec![PathPart::Vector("a".into(), Some("b.c".into()))]),
426 ("facets[app.bsky.richtext.facet].features[app.bsky.richtext.facet#mention].did", vec![
427 PathPart::Vector("facets".into(), Some("app.bsky.richtext.facet".into())),
428 PathPart::Vector("features".into(), Some("app.bsky.richtext.facet#mention".into())),
429 PathPart::Scalar("did".into()),
430 ]),
431 ];
432
433 for (path, expected) in cases {
434 let parsed = parse_record_path(path)?;
435 assert_eq!(parsed, expected, "path: {path:?}");
436 }
437
438 Ok(())
439 }
440
441 #[test]
442 fn test_match_shape() {
443 let cases = [
444 ("strong-ref", json!(""), None),
445 ("strong-ref", json!({}), None),
446 ("strong-ref", json!({ "uri": "abc" }), None),
447 ("strong-ref", json!({ "cid": "def" }), None),
448 (
449 "strong-ref",
450 json!({ "uri": "abc", "cid": "def" }),
451 Some(MatchedRef::AtUri { uri: "abc".to_string(), cid: Some("def".to_string()) }),
452 ),
453 ("at-uri", json!({ "uri": "abc" }), None),
454 ("at-uri", json!({ "uri": "abc", "cid": "def" }), None),
455 (
456 "at-uri",
457 json!("abc"),
458 Some(MatchedRef::AtUri { uri: "abc".to_string(), cid: None }),
459 ),
460 ("at-uri-parts", json!("abc"), None),
461 ("at-uri-parts", json!({}), None),
462 (
463 "at-uri-parts",
464 json!({"repo": "a", "collection": "b", "rkey": "c"}),
465 Some(MatchedRef::AtUri { uri: "at://a/b/c".to_string(), cid: None }),
466 ),
467 (
468 "at-uri-parts",
469 json!({"did": "a", "collection": "b", "rkey": "c"}),
470 Some(MatchedRef::AtUri { uri: "at://a/b/c".to_string(), cid: None }),
471 ),
472 (
473 "at-uri-parts",
474 // 'repo' takes precedence over 'did'
475 json!({"did": "a", "repo": "z", "collection": "b", "rkey": "c"}),
476 Some(MatchedRef::AtUri { uri: "at://z/b/c".to_string(), cid: None }),
477 ),
478 (
479 "at-uri-parts",
480 json!({"repo": "a", "collection": "b", "rkey": "c", "cid": "def"}),
481 Some(MatchedRef::AtUri { uri: "at://a/b/c".to_string(), cid: Some("def".to_string()) }),
482 ),
483 (
484 "at-uri-parts",
485 json!({"repo": "a", "collection": "b", "rkey": "c", "cid": {}}),
486 Some(MatchedRef::AtUri { uri: "at://a/b/c".to_string(), cid: None }),
487 ),
488 ("did", json!({}), None),
489 ("did", json!(""), None),
490 ("did", json!("bad-example.com"), None),
491 ("did", json!("did:plc:xyz"), Some(MatchedRef::Identifier("did:plc:xyz".to_string()))),
492 ("handle", json!({}), None),
493 ("handle", json!("bad-example.com"), Some(MatchedRef::Identifier("bad-example.com".to_string()))),
494 ("handle", json!("did:plc:xyz"), None),
495 ("at-identifier", json!({}), None),
496 ("at-identifier", json!("bad-example.com"), Some(MatchedRef::Identifier("bad-example.com".to_string()))),
497 ("at-identifier", json!("did:plc:xyz"), Some(MatchedRef::Identifier("did:plc:xyz".to_string()))),
498 ];
499 for (shape, val, expected) in cases {
500 let s = shape.try_into().unwrap();
501 let matched = match_shape(&s, &val);
502 assert_eq!(matched, expected, "shape: {shape:?}, val: {val:?}");
503 }
504 }
505}