Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0

Configure Feed

Select the types of activity you want to include in your feed.

backfill, still sketchy

+220 -28
+2 -2
jetstream/src/lib.rs
··· 439 439 440 440 if let Some(last) = last_cursor { 441 441 if event_cursor <= *last { 442 - log::warn!("event cursor {event_cursor:?} was older than the last one: {last:?}. dropping event."); 442 + log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event."); 443 443 continue; 444 444 } 445 445 } ··· 475 475 476 476 if let Some(last) = last_cursor { 477 477 if event_cursor <= *last { 478 - log::warn!("event cursor {event_cursor:?} was older than the last one: {last:?}. dropping event."); 478 + log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event."); 479 479 continue; 480 480 } 481 481 }
+22 -11
ufos/src/consumer.rs
··· 8 8 use std::mem; 9 9 use std::time::Duration; 10 10 use tokio::sync::mpsc::{channel, Receiver, Sender}; 11 + use tokio::time::timeout; 11 12 12 13 use crate::error::{BatchInsertError, FirehoseEventError}; 13 14 use crate::{DeleteAccount, EventBatch, UFOsCommit}; ··· 84 85 85 86 pub async fn run(&mut self) -> anyhow::Result<()> { 86 87 loop { 87 - if let Some(event) = self.jetstream_receiver.recv().await { 88 - self.handle_event(event).await? 89 - } else { 90 - anyhow::bail!("channel closed"); 88 + match timeout(Duration::from_millis(9_000), self.jetstream_receiver.recv()).await { 89 + Err(_elapsed) => self.no_events_step().await?, 90 + Ok(Some(event)) => self.handle_event(event).await?, 91 + Ok(None) => anyhow::bail!("channel closed"), 91 92 } 92 93 } 93 94 } 94 95 96 + async fn no_events_step(&mut self) -> anyhow::Result<()> { 97 + let empty = self.current_batch.batch.is_empty(); 98 + log::info!("no events received, stepping batcher (empty? {empty})"); 99 + if !empty { 100 + self.send_current_batch_now(true, "no events step").await?; 101 + } 102 + Ok(()) 103 + } 104 + 95 105 async fn handle_event(&mut self, event: JetstreamEvent) -> anyhow::Result<()> { 96 106 if let Some(earliest) = &self.current_batch.initial_cursor { 97 107 if event.cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS) 98 108 { 99 - self.send_current_batch_now(false).await?; 109 + self.send_current_batch_now(false, "time since event") 110 + .await?; 100 111 } 101 112 } else { 102 113 self.current_batch.initial_cursor = Some(event.cursor); ··· 126 137 if event.cursor.duration_since(earliest)?.as_secs_f64() > MIN_BATCH_SPAN_SECS 127 138 && self.batch_sender.capacity() == BATCH_QUEUE_SIZE 128 139 { 129 - self.send_current_batch_now(true).await?; 140 + self.send_current_batch_now(true, "available queue").await?; 130 141 } 131 142 } 132 143 Ok(()) ··· 141 152 ); 142 153 143 154 if let Err(BatchInsertError::BatchFull(commit)) = optimistic_res { 144 - self.send_current_batch_now(false).await?; 155 + self.send_current_batch_now(false, "handle commit").await?; 145 156 self.current_batch.batch.insert_commit_by_nsid( 146 157 &collection, 147 158 commit, ··· 157 168 158 169 async fn handle_delete_account(&mut self, did: Did, cursor: Cursor) -> anyhow::Result<()> { 159 170 if self.current_batch.batch.account_removes.len() >= MAX_ACCOUNT_REMOVES { 160 - self.send_current_batch_now(false).await?; 171 + self.send_current_batch_now(false, "delete account").await?; 161 172 } 162 173 self.current_batch 163 174 .batch ··· 168 179 169 180 // holds up all consumer progress until it can send to the channel 170 181 // use this when the current batch is too full to add more to it 171 - async fn send_current_batch_now(&mut self, small: bool) -> anyhow::Result<()> { 182 + async fn send_current_batch_now(&mut self, small: bool, referrer: &str) -> anyhow::Result<()> { 172 183 let beginning = match self.current_batch.initial_cursor.map(|c| c.elapsed()) { 173 184 None => "unknown".to_string(), 174 185 Some(Ok(t)) => format!("{:?}", t), 175 186 Some(Err(e)) => format!("+{:?}", e.duration()), 176 187 }; 177 - log::info!( 178 - "sending batch now from {beginning}, {}, queue capacity: {}", 188 + log::trace!( 189 + "sending batch now from {beginning}, {}, queue capacity: {}, referrer: {referrer}", 179 190 if small { "small" } else { "full" }, 180 191 self.batch_sender.capacity(), 181 192 );
+4 -1
ufos/src/file_consumer.rs
··· 19 19 return Err(JetstreamEventError::ReceiverClosedError.into()); 20 20 } 21 21 } 22 - Ok(()) 22 + log::info!("reached end of jsonl file, looping on noop to keep server alive."); 23 + loop { 24 + tokio::time::sleep(std::time::Duration::from_secs_f64(10.)).await; 25 + } 23 26 } 24 27 25 28 pub async fn consume(
+1
ufos/src/lib.rs
··· 230 230 endpoint: String, 231 231 started_at: u64, 232 232 latest_cursor: Option<u64>, 233 + rollup_cursor: Option<u64>, 233 234 }, 234 235 } 235 236
+160 -2
ufos/src/main.rs
··· 1 1 use clap::Parser; 2 2 use jetstream::events::Cursor; 3 3 use std::path::PathBuf; 4 + use std::time::{Duration, SystemTime}; 4 5 use ufos::consumer; 5 6 use ufos::file_consumer; 6 7 use ufos::server; ··· 8 9 use ufos::storage_fjall::FjallStorage; 9 10 use ufos::storage_mem::MemStorage; 10 11 use ufos::store_types::SketchSecretPrefix; 12 + use ufos::ConsumerInfo; 11 13 12 14 #[cfg(not(target_env = "msvc"))] 13 15 use tikv_jemallocator::Jemalloc; ··· 112 114 pause_writer: bool, 113 115 backfill: bool, 114 116 reroll: bool, 115 - read_store: impl StoreReader + 'static, 117 + read_store: impl StoreReader + 'static + Clone, 116 118 mut write_store: impl StoreWriter<B> + 'static, 117 119 cursor: Option<Cursor>, 118 120 sketch_secret: SketchSecretPrefix, 119 121 ) -> anyhow::Result<()> { 120 122 println!("starting server with storage..."); 121 - let serving = server::serve(read_store); 123 + let serving = server::serve(read_store.clone()); 122 124 123 125 if pause_writer { 124 126 log::info!("not starting jetstream or the write loop."); ··· 140 142 let rolling = write_store.background_tasks(reroll)?.run(backfill); 141 143 let storing = write_store.receive_batches(batches); 142 144 145 + let stating = do_update_stuff(read_store, backfill); 146 + 143 147 tokio::select! { 144 148 z = serving => log::warn!("serve task ended: {z:?}"), 145 149 z = rolling => log::warn!("rollup task ended: {z:?}"), 146 150 z = storing => log::warn!("storage task ended: {z:?}"), 151 + z = stating => log::warn!("status task ended: {z:?}"), 147 152 }; 148 153 149 154 println!("bye!"); 150 155 151 156 Ok(()) 152 157 } 158 + 159 + async fn do_update_stuff(read_store: impl StoreReader, actually: bool) { 160 + if !actually { 161 + loop { 162 + tokio::time::sleep(std::time::Duration::from_secs_f64(4.)).await; 163 + } 164 + } 165 + let started_at = std::time::SystemTime::now(); 166 + let mut first_cursor = None; 167 + let mut first_rollup = None; 168 + let mut last_at = std::time::SystemTime::now(); 169 + let mut last_cursor = None; 170 + let mut last_rollup = None; 171 + loop { 172 + log::info!("stat thing: sleeping"); 173 + tokio::time::sleep(std::time::Duration::from_secs_f64(4.)).await; 174 + log::info!("stat thing: slept, getting info"); 175 + match read_store.get_consumer_info().await { 176 + Err(e) => log::warn!("failed to get jetstream consumer info: {e:?}"), 177 + Ok(ConsumerInfo::Jetstream { 178 + latest_cursor, 179 + rollup_cursor, 180 + .. 181 + }) => { 182 + log::info!("stat thing: got info, reporting"); 183 + let now = std::time::SystemTime::now(); 184 + let latest_cursor = latest_cursor.map(Cursor::from_raw_u64); 185 + let rollup_cursor = rollup_cursor.map(Cursor::from_raw_u64); 186 + backfill_info( 187 + latest_cursor, 188 + rollup_cursor, 189 + last_cursor, 190 + last_rollup, 191 + last_at, 192 + first_cursor, 193 + first_rollup, 194 + started_at, 195 + now, 196 + ); 197 + first_cursor = first_cursor.or(latest_cursor); 198 + first_rollup = first_rollup.or(rollup_cursor); 199 + last_cursor = latest_cursor; 200 + last_rollup = rollup_cursor; 201 + last_at = now; 202 + } 203 + } 204 + } 205 + } 206 + 207 + fn nice_duration(dt: Duration) -> String { 208 + let secs = dt.as_secs_f64(); 209 + if secs < 1. { 210 + return format!("{:.0}ms", secs * 1000.); 211 + } 212 + if secs < 60. { 213 + return format!("{secs:.02}s"); 214 + } 215 + let mins = (secs / 60.).floor(); 216 + let rsecs = secs - (mins * 60.); 217 + if mins < 60. { 218 + return format!("{mins:.0}m{rsecs:.0}s"); 219 + } 220 + let hrs = (mins / 60.).floor(); 221 + let rmins = mins - (hrs * 60.); 222 + if hrs < 24. { 223 + return format!("{hrs:.0}h{rmins:.0}m{rsecs:.0}s"); 224 + } 225 + let days = (hrs / 24.).floor(); 226 + let rhrs = hrs - (days * 24.); 227 + format!("{days:.0}d{rhrs:.0}h{rmins:.0}m{rsecs:.0}s") 228 + } 229 + 230 + fn nice_dt_two_maybes(earlier: Option<Cursor>, later: Option<Cursor>) -> String { 231 + match (earlier, later) { 232 + (Some(earlier), Some(later)) => match later.duration_since(&earlier) { 233 + Ok(dt) => nice_duration(dt), 234 + Err(e) => { 235 + let rev_dt = e.duration(); 236 + format!("+{}", nice_duration(rev_dt)) 237 + } 238 + }, 239 + _ => "unknown".to_string(), 240 + } 241 + } 242 + 243 + #[allow(clippy::too_many_arguments)] 244 + fn backfill_info( 245 + latest_cursor: Option<Cursor>, 246 + rollup_cursor: Option<Cursor>, 247 + last_cursor: Option<Cursor>, 248 + last_rollup: Option<Cursor>, 249 + last_at: SystemTime, 250 + first_cursor: Option<Cursor>, 251 + first_rollup: Option<Cursor>, 252 + started_at: SystemTime, 253 + now: SystemTime, 254 + ) { 255 + let dt_real = now 256 + .duration_since(last_at) 257 + .unwrap_or(Duration::from_millis(1)); 258 + 259 + let dt_real_total = now 260 + .duration_since(started_at) 261 + .unwrap_or(Duration::from_millis(1)); 262 + 263 + let cursor_rate = latest_cursor 264 + .zip(last_cursor) 265 + .map(|(latest, last)| { 266 + latest 267 + .duration_since(&last) 268 + .unwrap_or(Duration::from_millis(1)) 269 + }) 270 + .map(|dtc| format!("{:.2}", dtc.as_secs_f64() / dt_real.as_secs_f64())) 271 + .unwrap_or("??".into()); 272 + 273 + let cursor_avg = latest_cursor 274 + .zip(first_cursor) 275 + .map(|(latest, first)| { 276 + latest 277 + .duration_since(&first) 278 + .unwrap_or(Duration::from_millis(1)) 279 + }) 280 + .map(|dtc| format!("{:.2}", dtc.as_secs_f64() / dt_real_total.as_secs_f64())) 281 + .unwrap_or("??".into()); 282 + 283 + let rollup_rate = rollup_cursor 284 + .zip(last_rollup) 285 + .map(|(latest, last)| { 286 + latest 287 + .duration_since(&last) 288 + .unwrap_or(Duration::from_millis(1)) 289 + }) 290 + .map(|dtc| format!("{:.2}", dtc.as_secs_f64() / dt_real.as_secs_f64())) 291 + .unwrap_or("??".into()); 292 + 293 + let rollup_avg = rollup_cursor 294 + .zip(first_rollup) 295 + .map(|(latest, first)| { 296 + latest 297 + .duration_since(&first) 298 + .unwrap_or(Duration::from_millis(1)) 299 + }) 300 + .map(|dtc| format!("{:.2}", dtc.as_secs_f64() / dt_real_total.as_secs_f64())) 301 + .unwrap_or("??".into()); 302 + 303 + log::info!( 304 + "cursor: {} behind (→{}, {cursor_rate}x, {cursor_avg}x avg). rollup: {} behind (→{}, {rollup_rate}x, {rollup_avg}x avg).", 305 + latest_cursor.map(|c| c.elapsed().map(nice_duration).unwrap_or("++".to_string())).unwrap_or("?".to_string()), 306 + nice_dt_two_maybes(last_cursor, latest_cursor), 307 + rollup_cursor.map(|c| c.elapsed().map(nice_duration).unwrap_or("++".to_string())).unwrap_or("?".to_string()), 308 + nice_dt_two_maybes(last_rollup, rollup_cursor), 309 + ); 310 + }
+2 -1
ufos/src/storage.rs
··· 53 53 54 54 fn step_rollup(&mut self) -> StorageResult<(usize, HashSet<Nsid>)>; 55 55 56 - fn trim_collection(&mut self, collection: &Nsid, limit: usize) -> StorageResult<()>; 56 + fn trim_collection(&mut self, collection: &Nsid, limit: usize) 57 + -> StorageResult<(usize, usize)>; 57 58 58 59 fn delete_account(&mut self, did: &Did) -> StorageResult<usize>; 59 60 }
+22 -9
ufos/src/storage_fjall.rs
··· 149 149 let keyspace = { 150 150 let config = Config::new(path); 151 151 152 - #[cfg(not(test))] 153 - let config = config.fsync_ms(Some(4_000)); 152 + // #[cfg(not(test))] 153 + // let config = config.fsync_ms(Some(4_000)); 154 154 155 155 config.open()? 156 156 }; ··· 360 360 get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)? 361 361 .map(|c| c.to_raw_u64()); 362 362 363 + let rollup_cursor = 364 + get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)? 365 + .map(|c| c.to_raw_u64()); 366 + 363 367 Ok(ConsumerInfo::Jetstream { 364 368 endpoint, 365 369 started_at, 366 370 latest_cursor, 371 + rollup_cursor, 367 372 }) 368 373 } 369 374 ··· 951 956 Ok((cursors_stepped, dirty_nsids)) 952 957 } 953 958 954 - fn trim_collection(&mut self, collection: &Nsid, limit: usize) -> StorageResult<()> { 959 + fn trim_collection( 960 + &mut self, 961 + collection: &Nsid, 962 + limit: usize, 963 + ) -> StorageResult<(usize, usize)> { 955 964 let mut dangling_feed_keys_cleaned = 0; 956 965 let mut records_deleted = 0; 957 966 ··· 1025 1034 1026 1035 batch.commit()?; 1027 1036 1028 - log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records"); 1029 - Ok(()) 1037 + log::trace!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records"); 1038 + Ok((dangling_feed_keys_cleaned, records_deleted)) 1030 1039 } 1031 1040 1032 1041 fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> { ··· 1055 1064 let mut dirty_nsids = HashSet::new(); 1056 1065 1057 1066 let mut rollup = 1058 - tokio::time::interval(Duration::from_millis(if backfill { 1_300 } else { 140 })); 1067 + tokio::time::interval(Duration::from_millis(if backfill { 1 } else { 81 })); 1059 1068 rollup.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); 1060 1069 1061 1070 let mut trim = ··· 1073 1082 log::trace!("rolled up {n} items ({} collections now dirty)", dirty_nsids.len()); 1074 1083 }, 1075 1084 _ = trim.tick() => { 1076 - log::info!("trimming {} nsids: {dirty_nsids:?}", dirty_nsids.len()); 1085 + let n = dirty_nsids.len(); 1086 + log::trace!("trimming {n} nsids: {dirty_nsids:?}"); 1077 1087 let t0 = Instant::now(); 1088 + let (mut total_danglers, mut total_deleted) = (0, 0); 1078 1089 for collection in &dirty_nsids { 1079 - self.0.trim_collection(collection, 512).inspect_err(|e| log::error!("trim error: {e:?}"))?; 1090 + let (danglers, deleted) = self.0.trim_collection(collection, 512).inspect_err(|e| log::error!("trim error: {e:?}"))?; 1091 + total_danglers += danglers; 1092 + total_deleted += deleted; 1080 1093 } 1081 - log::info!("finished trimming in {:?}", t0.elapsed()); 1094 + log::info!("finished trimming {n} nsids in {:?}: {total_danglers} dangling and {total_deleted} total removed.", t0.elapsed()); 1082 1095 dirty_nsids.clear(); 1083 1096 }, 1084 1097 };
+7 -2
ufos/src/storage_mem.rs
··· 441 441 get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)? 442 442 .map(|c| c.to_raw_u64()); 443 443 444 + let rollup_cursor = 445 + get_snapshot_static_neu::<NewRollupCursorKey, JetstreamCursorValue>(&global)? 446 + .map(|c| c.to_raw_u64()); 447 + 444 448 Ok(ConsumerInfo::Jetstream { 445 449 endpoint, 446 450 started_at, 447 451 latest_cursor, 452 + rollup_cursor, 448 453 }) 449 454 } 450 455 ··· 959 964 collection: &Nsid, 960 965 limit: usize, 961 966 // TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end) 962 - ) -> StorageResult<()> { 967 + ) -> StorageResult<(usize, usize)> { 963 968 let mut dangling_feed_keys_cleaned = 0; 964 969 let mut records_deleted = 0; 965 970 ··· 1015 1020 batch.commit()?; 1016 1021 1017 1022 log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records"); 1018 - Ok(()) 1023 + Ok((dangling_feed_keys_cleaned, records_deleted)) 1019 1024 } 1020 1025 1021 1026 fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> {