Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0

Configure Feed

Select the types of activity you want to include in your feed.

🤷‍♀️ still deadlocking

+141 -101
+1 -1
jetstream/src/lib.rs
··· 394 394 log::error!("Jetstream connection closed cleanly"); 395 395 } 396 396 if t_connected.elapsed() > Duration::from_secs(success_threshold_s) { 397 - retry_attempt = 1; 397 + retry_attempt = 0; 398 398 } 399 399 } 400 400
+9 -2
ufos/src/main.rs
··· 28 28 /// Location to store persist data to disk 29 29 #[arg(long)] 30 30 data: PathBuf, 31 + /// DEBUG: force the rw loop to fall behind by pausing it 32 + #[arg(long, action)] 33 + pause_rw: bool, 31 34 } 32 35 33 36 // #[tokio::main(flavor = "current_thread")] // TODO: move this to config via args ··· 62 65 }); 63 66 64 67 let t3 = tokio::task::spawn(async move { 65 - let r = storage.rw_loop().await; 66 - log::warn!("storage.rw_loop ended with: {r:?}"); 68 + if !args.pause_rw { 69 + let r = storage.rw_loop().await; 70 + log::warn!("storage.rw_loop ended with: {r:?}"); 71 + } else { 72 + log::info!("not starting rw loop."); 73 + } 67 74 }); 68 75 69 76 // tokio::select! {
+131 -98
ufos/src/store.rs
··· 17 17 use std::path::{Path, PathBuf}; 18 18 use std::time::{Duration, Instant}; 19 19 use tokio::time::sleep; 20 - use tokio::sync::{mpsc::Receiver, Mutex}; 20 + use tokio::sync::{mpsc::Receiver}; 21 21 22 22 /// Commit the RW batch immediately if this number of events have been read off the mod queue 23 - const MAX_BATCHED_RW_EVENTS: usize = 16; 23 + const MAX_BATCHED_RW_EVENTS: usize = 18; 24 24 25 25 /// Commit the RW batch immediately if this number of records is reached 26 26 /// ··· 29 29 /// - doing more work whenever scheduled means getting more CPU time in general 30 30 /// 31 31 /// this is higher than [MAX_BATCHED_RW_EVENTS] because account-deletes can have lots of items 32 - const MAX_BATCHED_RW_ITEMS: usize = 48; 32 + const MAX_BATCHED_RW_ITEMS: usize = 24; 33 33 34 34 35 35 #[derive(Clone)] ··· 38 38 partition: PartitionHandle, 39 39 } 40 40 41 - // struct FakeMutex<T> { 42 - // thing: T, 43 - // } 44 - // impl<T: Clone> FakeMutex<T> { 45 - // pub fn new(thing: T) -> Self { 46 - // Self { thing } 47 - // } 48 - // pub async fn lock(&self) -> T { 49 - // self.thing.clone() 50 - // } 51 - // } 41 + struct FakeMutex<T> { 42 + thing: T, 43 + } 44 + impl<T: Clone> FakeMutex<T> { 45 + pub fn new(thing: T) -> Self { 46 + Self { thing } 47 + } 48 + pub async fn lock(&self) -> T { 49 + self.thing.clone() 50 + } 51 + } 52 52 53 53 /** 54 54 * data format, roughly: ··· 78 78 #[derive(Clone)] 79 79 pub struct Storage { 80 80 /// horrible: gate all db access behind this to force global serialization to avoid deadlock 81 - db: Arc<Mutex<SerialDb>>, 81 + db: Arc<FakeMutex<SerialDb>>, 82 82 } 83 83 84 84 impl Storage { ··· 89 89 PartitionCreateOptions::default().compression(CompressionType::None), 90 90 )?; 91 91 Ok(Self { 92 - db: Arc::new(Mutex::new(SerialDb { keyspace, partition })), 92 + db: Arc::new(FakeMutex::new(SerialDb { keyspace, partition })), 93 93 }) 94 94 } 95 95 ··· 168 168 pub async fn rw_loop(&self) -> anyhow::Result<()> { 169 169 // TODO: lock so that only one rw loop can possibly be run. or even better, take a mutable resource thing to enforce at compile time. 170 170 loop { 171 - sleep(Duration::from_secs_f64(0.15)).await; // todo: interval rate-limit instead 171 + sleep(Duration::from_secs_f64(0.1)).await; // todo: interval rate-limit instead 172 172 173 173 let db = self.db.lock().await; 174 174 let keyspace = db.keyspace.clone(); ··· 186 186 let mut any_tasks_found = false; 187 187 188 188 log::trace!("rw: iterating newer rw items..."); 189 - for (i, pair) in partition.range(range.clone()).enumerate() { 190 - log::trace!("rw: iterating {i}"); 191 - any_tasks_found = true; 192 189 193 - if i >= MAX_BATCHED_RW_EVENTS { 194 - break; 195 - } 196 190 197 - let (key_bytes, val_bytes) = pair?; 198 - let mod_key = match db_complete::<ModQueueItemKey>(&key_bytes) { 199 - Ok(k) => k, 200 - Err(EncodingError::WrongStaticPrefix(_, _)) => { 201 - panic!("wsp: mod queue empty."); 191 + //// ITER 192 + 193 + { 194 + let iterator = partition.range(range.clone()).enumerate().into_iter(); 195 + 196 + for (i, pair) in iterator { 197 + log::trace!("rw: iterating {i}"); 198 + any_tasks_found = true; 199 + 200 + if i >= MAX_BATCHED_RW_EVENTS { 201 + break; 202 202 } 203 - otherwise => otherwise?, 204 - }; 203 + 204 + let (key_bytes, val_bytes) = pair?; 205 + let mod_key = match db_complete::<ModQueueItemKey>(&key_bytes) { 206 + Ok(k) => k, 207 + Err(EncodingError::WrongStaticPrefix(_, _)) => { 208 + panic!("wsp: mod queue empty."); 209 + } 210 + otherwise => otherwise?, 211 + }; 205 212 206 - let mod_value: ModQueueItemValue = 207 - db_complete::<ModQueueItemStringValue>(&val_bytes)?.try_into()?; 213 + let mod_value: ModQueueItemValue = 214 + db_complete::<ModQueueItemStringValue>(&val_bytes)?.try_into()?; 208 215 209 - log::trace!("rw: iterating {i}: sending to batcher {mod_key:?} => {mod_value:?}"); 210 - batched_rw_items += DBWriter { 211 - keyspace: keyspace.clone(), 212 - partition: partition.clone(), 213 - } 214 - .write_rw(&mut db_batch, mod_key, mod_value)?; 215 - log::trace!("rw: iterating {i}: back from batcher."); 216 + log::trace!("rw: iterating {i}: sending to batcher {mod_key:?} => {mod_value:?}"); 217 + batched_rw_items += DBWriter { 218 + keyspace: keyspace.clone(), 219 + partition: partition.clone(), 220 + } 221 + .write_rw(&mut db_batch, mod_key, mod_value)?; 222 + log::trace!("rw: iterating {i}: back from batcher."); 216 223 217 - if batched_rw_items >= MAX_BATCHED_RW_ITEMS { 218 - log::trace!("rw: iterating {i}: batch big enough, breaking out."); 219 - break; 224 + if batched_rw_items >= MAX_BATCHED_RW_ITEMS { 225 + log::trace!("rw: iterating {i}: batch big enough, breaking out."); 226 + break; 227 + } 220 228 } 229 + // drop(iterator); // moved -- must be dropped hopefully 221 230 } 222 231 223 232 if !any_tasks_found { ··· 246 255 let prefix = ByCollectionKey::prefix_from_collection(collection.clone())?; 247 256 tokio::task::spawn_blocking(move || { 248 257 let mut output = Vec::new(); 249 - for pair in partition.prefix(&prefix).rev().take(limit) { 250 - let (k_bytes, v_bytes) = pair?; 251 - let (_, cursor) = db_complete::<ByCollectionKey>(&k_bytes)?.into(); 252 - let (did, rkey, record) = db_complete::<ByCollectionValue>(&v_bytes)?.into(); 253 - output.push(CreateRecord { 254 - did, 255 - rkey, 256 - record, 257 - cursor, 258 - }) 258 + 259 + 260 + ////// ITER 261 + { 262 + for pair in partition.prefix(&prefix).rev().take(limit) { 263 + let (k_bytes, v_bytes) = pair?; 264 + let (_, cursor) = db_complete::<ByCollectionKey>(&k_bytes)?.into(); 265 + let (did, rkey, record) = db_complete::<ByCollectionValue>(&v_bytes)?.into(); 266 + output.push(CreateRecord { 267 + did, 268 + rkey, 269 + record, 270 + cursor, 271 + }) 272 + } 259 273 } 260 274 Ok(output) 261 275 }) ··· 375 389 376 390 let mut scanned = 0; 377 391 let mut rolled = 0; 378 - for pair in partition.range(range) { 379 - let (key_bytes, value_bytes) = pair?; 380 - let key = db_complete::<ByCursorSeenKey>(&key_bytes)?; 381 - let val = db_complete::<ByCursorSeenValue>(&value_bytes)?; 392 + 393 + 394 + ////// ITER 395 + 396 + { 397 + for pair in partition.range(range) { 398 + let (key_bytes, value_bytes) = pair?; 399 + let key = db_complete::<ByCursorSeenKey>(&key_bytes)?; 400 + let val = db_complete::<ByCursorSeenValue>(&value_bytes)?; 382 401 383 - if *key.collection() == collection { 384 - let SeenCounter(n) = val; 385 - collection_total += n; 386 - rolled += 1; 402 + if *key.collection() == collection { 403 + let SeenCounter(n) = val; 404 + collection_total += n; 405 + rolled += 1; 406 + } 407 + scanned += 1; 387 408 } 388 - scanned += 1; 389 409 } 390 410 391 411 eprintln!("scanned: {scanned}, rolled: {rolled}"); ··· 491 511 let mut items_removed = 0; 492 512 493 513 log::trace!("delete_record: iterate over up to current cursor..."); 494 - for (i, pair) in self.partition.range(key_prefix_bytes..key_limit).enumerate() { 495 - log::trace!("delete_record iter {i}: found"); 496 - // find all (hopefully 1) 497 - let (key_bytes, _) = pair?; 498 - let key = db_complete::<ByIdKey>(&key_bytes)?; 499 - let found_cursor = key.cursor(); 500 - if found_cursor > cursor { 501 - // we are *only* allowed to delete records that came before the record delete event 502 - // log::trace!("delete_record: found (and ignoring) newer version(s). key: {key:?}"); 503 - panic!("wtf, found newer version than cursor limit we tried to set."); 504 - // break; 505 - } 506 514 507 - // remove the by_id entry 508 - db_batch.remove(&self.partition, key_bytes); 509 515 510 - // remove its record sample 511 - let by_collection_key_bytes = 512 - ByCollectionKey::new(collection.clone(), found_cursor).to_db_bytes()?; 513 - db_batch.remove(&self.partition, by_collection_key_bytes); 516 + ////////// ITER 514 517 515 - items_removed += 1; 518 + { 519 + for (i, pair) in self.partition.range(key_prefix_bytes..key_limit).enumerate() { 520 + log::trace!("delete_record iter {i}: found"); 521 + // find all (hopefully 1) 522 + let (key_bytes, _) = pair?; 523 + let key = db_complete::<ByIdKey>(&key_bytes)?; 524 + let found_cursor = key.cursor(); 525 + if found_cursor > cursor { 526 + // we are *only* allowed to delete records that came before the record delete event 527 + // log::trace!("delete_record: found (and ignoring) newer version(s). key: {key:?}"); 528 + panic!("wtf, found newer version than cursor limit we tried to set."); 529 + // break; 530 + } 531 + 532 + // remove the by_id entry 533 + db_batch.remove(&self.partition, key_bytes); 534 + 535 + // remove its record sample 536 + let by_collection_key_bytes = 537 + ByCollectionKey::new(collection.clone(), found_cursor).to_db_bytes()?; 538 + db_batch.remove(&self.partition, by_collection_key_bytes); 539 + 540 + items_removed += 1; 541 + } 516 542 } 517 543 518 544 // if items_removed > 1 { ··· 541 567 let key_prefix_bytes = ByIdKey::did_prefix(did).to_db_bytes()?; 542 568 543 569 let mut items_added = 0; 544 - for pair in self.partition.prefix(&key_prefix_bytes) { 545 - let (key_bytes, _) = pair?; 570 + 571 + 572 + 573 + ////////// ITER 574 + 575 + { 576 + for pair in self.partition.prefix(&key_prefix_bytes) { 577 + let (key_bytes, _) = pair?; 546 578 547 - let (_, collection, _rkey, found_cursor) = db_complete::<ByIdKey>(&key_bytes)?.into(); 548 - if found_cursor > cursor { 549 - log::trace!( 550 - "delete account: found (and ignoring) newer records than the delete event??" 551 - ); 552 - continue; 553 - } 579 + let (_, collection, _rkey, found_cursor) = db_complete::<ByIdKey>(&key_bytes)?.into(); 580 + if found_cursor > cursor { 581 + log::trace!( 582 + "delete account: found (and ignoring) newer records than the delete event??" 583 + ); 584 + continue; 585 + } 554 586 555 - // remove the by_id entry 556 - db_batch.remove(&self.partition, key_bytes); 587 + // remove the by_id entry 588 + db_batch.remove(&self.partition, key_bytes); 557 589 558 - // remove its record sample 559 - let by_collection_key_bytes = 560 - ByCollectionKey::new(collection, found_cursor).to_db_bytes()?; 561 - db_batch.remove(&self.partition, by_collection_key_bytes); 590 + // remove its record sample 591 + let by_collection_key_bytes = 592 + ByCollectionKey::new(collection, found_cursor).to_db_bytes()?; 593 + db_batch.remove(&self.partition, by_collection_key_bytes); 562 594 563 - items_added += 1; 564 - if items_added >= MAX_BATCHED_RW_ITEMS { 565 - return Ok((items_added, false)); // there might be more records but we've done enough for this batch 595 + items_added += 1; 596 + if items_added >= MAX_BATCHED_RW_ITEMS { 597 + return Ok((items_added, false)); // there might be more records but we've done enough for this batch 598 + } 566 599 } 567 600 } 568 601