Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0

Configure Feed

Select the types of activity you want to include in your feed.

just do major compact

and fix the default-run config

+43 -109
+5
ufos/Cargo.toml
··· 2 2 name = "ufos" 3 3 version = "0.1.0" 4 4 edition = "2021" 5 + default-run = "ufos" 5 6 6 7 [dependencies] 7 8 anyhow = "1.0.97" ··· 37 38 [[bin]] 38 39 name = "analyze" 39 40 path = "src/bin/analyze.rs" 41 + 42 + [[bin]] 43 + name = "major-compact" 44 + path = "src/bin/major-compact.rs" 40 45 41 46 [dev-dependencies] 42 47 tempfile = "3.19.1"
+34
ufos/src/bin/major-compact.rs
··· 1 + use clap::Parser; 2 + use fjall::{Config, PartitionCreateOptions}; 3 + use std::path::PathBuf; 4 + use std::time::Instant; 5 + 6 + #[derive(Parser)] 7 + #[command(about = "Run a major compaction over every ufos partition")] 8 + struct Cli { 9 + /// path to the fjall data directory 10 + /// 11 + /// WARNING: MUST NOT RUN WHILE ANOTHER UFOS PROCESS IS USING IT 12 + data: PathBuf, 13 + } 14 + 15 + fn main() -> anyhow::Result<()> { 16 + let cli = Cli::parse(); 17 + 18 + eprintln!("opening db at {:?}...", cli.data); 19 + let keyspace = Config::new(&cli.data).open()?; 20 + 21 + for name in ["global", "feeds", "records", "rollups", "queues"] { 22 + let partition = keyspace.open_partition(name, PartitionCreateOptions::default())?; 23 + let size0 = partition.disk_space(); 24 + eprintln!("beginning major compaction for {name} (original size: {size0})"); 25 + let t0 = Instant::now(); 26 + partition.major_compact()?; 27 + let dt = t0.elapsed(); 28 + let sizef = partition.disk_space(); 29 + let dsize = (sizef as i64) - (size0 as i64); 30 + eprintln!("completed compaction for {name} in {dt:?} (new size: {sizef}, {dsize})"); 31 + } 32 + 33 + Ok(()) 34 + }
+2 -22
ufos/src/main.rs
··· 9 9 use ufos::file_consumer; 10 10 use ufos::server; 11 11 use ufos::storage::{StorageWhatever, StoreBackground, StoreReader, StoreWriter}; 12 - use ufos::storage_fjall::{FjallConfig, FjallStorage}; 12 + use ufos::storage_fjall::FjallStorage; 13 13 use ufos::store_types::SketchSecretPrefix; 14 14 use ufos::{nice_duration, ConsumerInfo}; 15 15 ··· 59 59 /// DEBUG: interpret jetstream as a file fixture 60 60 #[arg(long, action, env = "UFOS_JETSTREAM_FIXTURE")] 61 61 jetstream_fixture: bool, 62 - /// HOPEFULLY only needed once 63 - /// 64 - /// brute-force garbage-collect all dangling records because we weren't deleting 65 - /// them before at all (oops) 66 - #[arg(long, action)] 67 - fjall_records_gc: bool, 68 62 /// enable metrics collection and serving 69 63 #[arg(long, action, env = "UFOS_COLLECT_METRICS")] 70 64 collect_metrics: bool, ··· 84 78 args.data.clone(), 85 79 jetstream, 86 80 args.jetstream_force, 87 - FjallConfig { 88 - major_compact: !args.fjall_records_gc, 89 - }, 81 + Default::default(), 90 82 )?; 91 - 92 - if args.fjall_records_gc { 93 - log::info!("beginning brute-force records gc"); 94 - let t0 = std::time::Instant::now(); 95 - let (n, m) = write_store.records_brute_gc_danger()?; 96 - let dt = t0.elapsed(); 97 - log::info!( 98 - "completed brute-force records gc in {dt:?}, removed {n} and retained {m} records." 99 - ); 100 - return Ok(()); 101 - } 102 - 103 83 go(args, read_store, write_store, cursor, sketch_secret).await?; 104 84 Ok(()) 105 85 }
+2 -87
ufos/src/storage_fjall.rs
··· 148 148 /// this is only meant for tests 149 149 #[cfg(test)] 150 150 pub temp: bool, 151 - /// do major compaction on startup 152 - /// 153 - /// default is false. probably a good thing unless it's too slow. 154 - pub major_compact: bool, 155 151 } 156 152 157 153 impl StorageWhatever<FjallReader, FjallWriter, FjallBackground, FjallConfig> for FjallStorage { ··· 159 155 path: impl AsRef<Path>, 160 156 endpoint: String, 161 157 force_endpoint: bool, 162 - config: FjallConfig, 158 + _config: FjallConfig, 163 159 ) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>, SketchSecretPrefix)> { 164 160 let keyspace = { 165 161 let config = Config::new(path); ··· 227 223 228 224 sketch_secret 229 225 }; 230 - 231 - if config.major_compact { 232 - for (partition, name) in [ 233 - (&global, "global"), 234 - (&feeds, "feeds"), 235 - (&records, "records"), 236 - (&rollups, "rollups"), 237 - (&queues, "queues"), 238 - ] { 239 - let size0 = partition.disk_space(); 240 - log::info!("beggining major compaction for {name} (original size: {size0})"); 241 - let t0 = Instant::now(); 242 - partition.major_compact().expect("compact better work 😬"); 243 - let dt = t0.elapsed(); 244 - let sizef = partition.disk_space(); 245 - let dsize = (sizef as i64) - (size0 as i64); 246 - log::info!( 247 - "completed compaction for {name} in {dt:?} (new size: {sizef}, {dsize})" 248 - ); 249 - } 250 - } else { 251 - log::info!("skipping major compaction on startup"); 252 - } 253 226 254 227 let reader = FjallReader { 255 228 keyspace: keyspace.clone(), ··· 1381 1354 batch.commit()?; 1382 1355 Ok((cursors_advanced, dirty_nsids)) 1383 1356 } 1384 - pub fn records_brute_gc_danger(&self) -> StorageResult<(usize, usize)> { 1385 - let (mut removed, mut retained) = (0, 0); 1386 - let mut to_retain = HashSet::<Vec<u8>>::new(); 1387 - 1388 - // Partition: 'feed' 1389 - // 1390 - // - Per-collection list of record references ordered by jetstream cursor 1391 - // - key: nullstr || u64 (collection nsid null-terminated, jetstream cursor) 1392 - // - val: nullstr || nullstr || nullstr (did, rkey, rev. rev is mostly a sanity-check for now.) 1393 - // 1394 - // 1395 - // Partition: 'records' 1396 - // 1397 - // - Actual records by their atproto location 1398 - // - key: nullstr || nullstr || nullstr (did, collection, rkey) 1399 - // - val: u64 || bool || nullstr || rawval (js_cursor, is_update, rev, actual record) 1400 - // 1401 - // 1402 - 1403 - log::warn!("loading *all* record keys from feed into memory (yikes)"); 1404 - let t0 = Instant::now(); 1405 - for (i, kv) in self.feeds.iter().enumerate() { 1406 - if i > 0 && (i % 10_000_000 == 0) { 1407 - log::info!("{i}..."); 1408 - } 1409 - let (key_bytes, val_bytes) = kv?; 1410 - let key = db_complete::<NsidRecordFeedKey>(&key_bytes)?; 1411 - let val = db_complete::<NsidRecordFeedVal>(&val_bytes)?; 1412 - let record_key: RecordLocationKey = (&key, &val).into(); 1413 - to_retain.insert(record_key.to_db_bytes()?); 1414 - } 1415 - log::warn!( 1416 - "loaded. wow. took {:?}, found {} keys", 1417 - t0.elapsed(), 1418 - to_retain.len() 1419 - ); 1420 - 1421 - log::warn!("warmup OVER, iterating some billions of record keys now"); 1422 - let t0 = Instant::now(); 1423 - for (i, k) in self.records.keys().enumerate() { 1424 - let key_bytes = k?; 1425 - if to_retain.contains(&*key_bytes) { 1426 - retained += 1; 1427 - } else { 1428 - self.records.remove(key_bytes)?; 1429 - removed += 1; 1430 - } 1431 - if i > 0 && (i % 100_000_000) == 0 { 1432 - log::info!("{i}: {retained} retained, {removed} removed."); 1433 - } 1434 - } 1435 - log::warn!("whew! that took {:?}", t0.elapsed()); 1436 - 1437 - Ok((removed, retained)) 1438 - } 1439 1357 } 1440 1358 1441 1359 impl StoreWriter<FjallBackground> for FjallWriter { ··· 1887 1805 tempfile::tempdir().unwrap(), 1888 1806 "offline test (no real jetstream endpoint)".to_string(), 1889 1807 false, 1890 - FjallConfig { 1891 - temp: true, 1892 - ..Default::default() 1893 - }, 1808 + FjallConfig { temp: true }, 1894 1809 ) 1895 1810 .unwrap(); 1896 1811 (read, write)