Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'main' into constellation/eat-rocks

and update eat-rocks in one go (oops)

+956 -396
+11 -1
.github/workflows/checks.yml
··· 28 28 - name: get nightly toolchain for jetstream fmt 29 29 run: rustup toolchain install nightly --allow-downgrade -c rustfmt 30 30 - name: fmt 31 - run: cargo fmt --package constellation --package links --package pocket --package quasar --package slingshot --package spacedust --package ufos -- --check 31 + run: | 32 + cargo fmt \ 33 + --package constellation \ 34 + --package links \ 35 + --package pocket \ 36 + --package quasar \ 37 + --package slingshot \ 38 + --package spacedust \ 39 + --package ufos \ 40 + -- \ 41 + --check 32 42 - name: fmt jetstream (nightly) 33 43 run: cargo +nightly fmt --package jetstream -- --check 34 44 - name: clippy
+145 -125
Cargo.lock
··· 18 18 checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" 19 19 20 20 [[package]] 21 - name = "ahash" 22 - version = "0.8.11" 23 - source = "registry+https://github.com/rust-lang/crates.io-index" 24 - checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" 25 - dependencies = [ 26 - "cfg-if", 27 - "once_cell", 28 - "version_check", 29 - "zerocopy 0.7.35", 30 - ] 31 - 32 - [[package]] 33 21 name = "aho-corasick" 34 22 version = "1.1.3" 35 23 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 185 173 "nom", 186 174 "num-traits", 187 175 "rusticata-macros", 188 - "thiserror 2.0.16", 176 + "thiserror 2.0.18", 189 177 "time", 190 178 ] 191 179 ··· 737 725 738 726 [[package]] 739 727 name = "cardinality-estimator-safe" 740 - version = "4.0.2" 728 + version = "4.0.3" 741 729 source = "registry+https://github.com/rust-lang/crates.io-index" 742 - checksum = "dc9887b4092040ea9a416fc3de9769ee7783e3cd5c168c941e6a8de69723b971" 730 + checksum = "3879e0b6ebe0bef99874ab3942caee80365d00cf686b93a7cc9c0c9cb3a9f8e7" 743 731 dependencies = [ 744 732 "digest", 745 733 "enum_dispatch", ··· 971 959 "links", 972 960 "mediatype", 973 961 "metrics", 974 - "metrics-exporter-prometheus 0.16.2", 962 + "metrics-exporter-prometheus 0.18.3", 975 963 "metrics-process", 964 + "metrics-util", 976 965 "num-format", 977 966 "ratelimit", 978 967 "rocksdb", ··· 980 969 "serde_json", 981 970 "serde_with", 982 971 "tempfile", 972 + "thiserror 2.0.18", 983 973 "tinyjson", 984 974 "tokio", 985 975 "tokio-util", ··· 1377 1367 "slog-bunyan", 1378 1368 "slog-json", 1379 1369 "slog-term", 1380 - "thiserror 2.0.16", 1370 + "thiserror 2.0.18", 1381 1371 "tokio", 1382 1372 "tokio-rustls 0.25.0", 1383 1373 "toml", ··· 1415 1405 1416 1406 [[package]] 1417 1407 name = "eat-rocks" 1418 - version = "0.1.1" 1408 + version = "0.2.0" 1419 1409 source = "registry+https://github.com/rust-lang/crates.io-index" 1420 - checksum = "19a562da0f74abda90eb1cc96f7adf23a33ce1c71a8d714ae93afa27d33a7a31" 1410 + checksum = "39085e0daac22aea54faf9c0b92d5a391143cc24f9b2fee8fac4cf2fdc56f7b2" 1421 1411 dependencies = [ 1422 1412 "crc32c", 1423 1413 "futures", 1424 1414 "object_store", 1425 - "thiserror 2.0.16", 1415 + "thiserror 2.0.18", 1426 1416 "tokio", 1427 1417 "tracing", 1428 1418 ] ··· 1476 1466 ] 1477 1467 1478 1468 [[package]] 1469 + name = "endian-type" 1470 + version = "0.1.2" 1471 + source = "registry+https://github.com/rust-lang/crates.io-index" 1472 + checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" 1473 + 1474 + [[package]] 1479 1475 name = "enum-as-inner" 1480 1476 version = "0.6.1" 1481 1477 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1560 1556 ] 1561 1557 1562 1558 [[package]] 1559 + name = "evmap" 1560 + version = "11.0.0" 1561 + source = "registry+https://github.com/rust-lang/crates.io-index" 1562 + checksum = "1b8874945f036109c72242964c1174cf99434e30cfa45bf45fedc983f50046f8" 1563 + dependencies = [ 1564 + "hashbag", 1565 + "left-right", 1566 + "smallvec", 1567 + ] 1568 + 1569 + [[package]] 1563 1570 name = "fallible-iterator" 1564 1571 version = "0.3.0" 1565 1572 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1799 1806 "mea", 1800 1807 "parking_lot", 1801 1808 "pin-project", 1802 - "rand 0.9.1", 1809 + "rand 0.9.3", 1803 1810 "serde", 1804 1811 "tracing", 1805 1812 "twox-hash", ··· 2054 2061 ] 2055 2062 2056 2063 [[package]] 2064 + name = "hashbag" 2065 + version = "0.1.13" 2066 + source = "registry+https://github.com/rust-lang/crates.io-index" 2067 + checksum = "7040a10f52cba493ddb09926e15d10a9d8a28043708a405931fe4c6f19fac064" 2068 + 2069 + [[package]] 2057 2070 name = "hashbrown" 2058 2071 version = "0.12.3" 2059 2072 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2174 2187 "idna", 2175 2188 "ipnet", 2176 2189 "once_cell", 2177 - "rand 0.9.1", 2190 + "rand 0.9.3", 2178 2191 "ring", 2179 - "thiserror 2.0.16", 2192 + "thiserror 2.0.18", 2180 2193 "tinyvec", 2181 2194 "tokio", 2182 2195 "tracing", ··· 2196 2209 "moka", 2197 2210 "once_cell", 2198 2211 "parking_lot", 2199 - "rand 0.9.1", 2212 + "rand 0.9.3", 2200 2213 "resolv-conf", 2201 2214 "smallvec", 2202 - "thiserror 2.0.16", 2215 + "thiserror 2.0.18", 2203 2216 "tokio", 2204 2217 "tracing", 2205 2218 ] ··· 2307 2320 2308 2321 [[package]] 2309 2322 name = "hyper" 2310 - version = "1.6.0" 2323 + version = "1.9.0" 2311 2324 source = "registry+https://github.com/rust-lang/crates.io-index" 2312 - checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" 2325 + checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" 2313 2326 dependencies = [ 2327 + "atomic-waker", 2314 2328 "bytes", 2315 2329 "futures-channel", 2316 - "futures-util", 2330 + "futures-core", 2317 2331 "h2", 2318 2332 "http", 2319 2333 "http-body", ··· 2697 2711 "metrics", 2698 2712 "serde", 2699 2713 "serde_json", 2700 - "thiserror 2.0.16", 2714 + "thiserror 2.0.18", 2701 2715 "tokio", 2702 2716 "tokio-tungstenite 0.26.2", 2703 2717 "url", ··· 2843 2857 checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" 2844 2858 2845 2859 [[package]] 2860 + name = "left-right" 2861 + version = "0.11.7" 2862 + source = "registry+https://github.com/rust-lang/crates.io-index" 2863 + checksum = "0f0c21e4c8ff95f487fb34e6f9182875f42c84cef966d29216bf115d9bba835a" 2864 + dependencies = [ 2865 + "crossbeam-utils", 2866 + "loom", 2867 + "slab", 2868 + ] 2869 + 2870 + [[package]] 2846 2871 name = "libc" 2847 2872 version = "0.2.174" 2848 2873 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2935 2960 name = "links" 2936 2961 version = "0.1.0" 2937 2962 dependencies = [ 2938 - "anyhow", 2939 2963 "fluent-uri", 2940 - "nom", 2941 - "thiserror 2.0.16", 2964 + "thiserror 2.0.18", 2942 2965 "tinyjson", 2943 2966 ] 2944 2967 ··· 3125 3148 3126 3149 [[package]] 3127 3150 name = "metrics" 3128 - version = "0.24.2" 3151 + version = "0.24.5" 3129 3152 source = "registry+https://github.com/rust-lang/crates.io-index" 3130 - checksum = "25dea7ac8057892855ec285c440160265225438c3c45072613c25a4b26e98ef5" 3153 + checksum = "ff56c2e7dce6bd462e3b8919986a617027481b1dcc703175b58cf9dd98a2f071" 3131 3154 dependencies = [ 3132 - "ahash", 3133 3155 "portable-atomic", 3156 + "rapidhash", 3134 3157 ] 3135 3158 3136 3159 [[package]] 3137 3160 name = "metrics-exporter-prometheus" 3138 - version = "0.16.2" 3161 + version = "0.17.2" 3139 3162 source = "registry+https://github.com/rust-lang/crates.io-index" 3140 - checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" 3163 + checksum = "2b166dea96003ee2531cf14833efedced545751d800f03535801d833313f8c15" 3141 3164 dependencies = [ 3142 3165 "base64 0.22.1", 3143 3166 "http-body-util", 3144 3167 "hyper", 3168 + "hyper-rustls", 3145 3169 "hyper-util", 3146 3170 "indexmap 2.11.4", 3147 3171 "ipnet", 3148 3172 "metrics", 3149 - "metrics-util 0.19.0", 3173 + "metrics-util", 3150 3174 "quanta", 3151 - "thiserror 1.0.69", 3175 + "thiserror 2.0.18", 3152 3176 "tokio", 3153 3177 "tracing", 3154 3178 ] 3155 3179 3156 3180 [[package]] 3157 3181 name = "metrics-exporter-prometheus" 3158 - version = "0.17.2" 3182 + version = "0.18.3" 3159 3183 source = "registry+https://github.com/rust-lang/crates.io-index" 3160 - checksum = "2b166dea96003ee2531cf14833efedced545751d800f03535801d833313f8c15" 3184 + checksum = "1db0d8f1fc9e62caebd0319e11eaec5822b0186c171568f0480b46a0137f9108" 3161 3185 dependencies = [ 3162 3186 "base64 0.22.1", 3187 + "evmap", 3163 3188 "http-body-util", 3164 3189 "hyper", 3165 - "hyper-rustls", 3166 3190 "hyper-util", 3167 3191 "indexmap 2.11.4", 3168 3192 "ipnet", 3169 3193 "metrics", 3170 - "metrics-util 0.20.0", 3194 + "metrics-util", 3171 3195 "quanta", 3172 - "thiserror 2.0.16", 3196 + "thiserror 2.0.18", 3173 3197 "tokio", 3174 3198 "tracing", 3175 3199 ] ··· 3192 3216 3193 3217 [[package]] 3194 3218 name = "metrics-util" 3195 - version = "0.19.0" 3219 + version = "0.20.3" 3196 3220 source = "registry+https://github.com/rust-lang/crates.io-index" 3197 - checksum = "dbd4884b1dd24f7d6628274a2f5ae22465c337c5ba065ec9b6edccddf8acc673" 3221 + checksum = "9e56997f084e57b045edf17c3ed8ba7f9f779c670df8206dfd1c736f4c02dc4a" 3198 3222 dependencies = [ 3223 + "aho-corasick", 3199 3224 "crossbeam-epoch", 3200 3225 "crossbeam-utils", 3201 - "hashbrown 0.15.2", 3202 - "metrics", 3203 - "quanta", 3204 - "rand 0.8.5", 3205 - "rand_xoshiro 0.6.0", 3206 - "sketches-ddsketch", 3207 - ] 3208 - 3209 - [[package]] 3210 - name = "metrics-util" 3211 - version = "0.20.0" 3212 - source = "registry+https://github.com/rust-lang/crates.io-index" 3213 - checksum = "fe8db7a05415d0f919ffb905afa37784f71901c9a773188876984b4f769ab986" 3214 - dependencies = [ 3215 - "crossbeam-epoch", 3216 - "crossbeam-utils", 3217 - "hashbrown 0.15.2", 3226 + "hashbrown 0.16.1", 3227 + "indexmap 2.11.4", 3218 3228 "metrics", 3229 + "ordered-float", 3219 3230 "quanta", 3220 - "rand 0.9.1", 3221 - "rand_xoshiro 0.7.0", 3231 + "radix_trie", 3232 + "rand 0.9.3", 3233 + "rand_xoshiro", 3234 + "rapidhash", 3222 3235 "sketches-ddsketch", 3223 3236 ] 3224 3237 ··· 3351 3364 "security-framework 2.11.1", 3352 3365 "security-framework-sys", 3353 3366 "tempfile", 3367 + ] 3368 + 3369 + [[package]] 3370 + name = "nibble_vec" 3371 + version = "0.1.0" 3372 + source = "registry+https://github.com/rust-lang/crates.io-index" 3373 + checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" 3374 + dependencies = [ 3375 + "smallvec", 3354 3376 ] 3355 3377 3356 3378 [[package]] ··· 3485 3507 "serde", 3486 3508 "serde_json", 3487 3509 "serde_urlencoded", 3488 - "thiserror 2.0.16", 3510 + "thiserror 2.0.18", 3489 3511 "tokio", 3490 3512 "tracing", 3491 3513 "url", ··· 3526 3548 3527 3549 [[package]] 3528 3550 name = "openssl" 3529 - version = "0.10.72" 3551 + version = "0.10.79" 3530 3552 source = "registry+https://github.com/rust-lang/crates.io-index" 3531 - checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" 3553 + checksum = "bf0b434746ee2832f4f0baf10137e1cabb18cbe6912c69e2e33263c45250f542" 3532 3554 dependencies = [ 3533 3555 "bitflags", 3534 3556 "cfg-if", 3535 3557 "foreign-types", 3536 3558 "libc", 3537 - "once_cell", 3538 3559 "openssl-macros", 3539 3560 "openssl-sys", 3540 3561 ] ··· 3567 3588 3568 3589 [[package]] 3569 3590 name = "openssl-sys" 3570 - version = "0.9.107" 3591 + version = "0.9.115" 3571 3592 source = "registry+https://github.com/rust-lang/crates.io-index" 3572 - checksum = "8288979acd84749c744a9014b4382d42b8f7b2592847b5afb2ed29e5d16ede07" 3593 + checksum = "158fe5b292746440aa6e7a7e690e55aeb72d41505e2804c23c6973ad0e9c9781" 3573 3594 dependencies = [ 3574 3595 "cc", 3575 3596 "libc", ··· 3579 3600 ] 3580 3601 3581 3602 [[package]] 3603 + name = "ordered-float" 3604 + version = "5.3.0" 3605 + source = "registry+https://github.com/rust-lang/crates.io-index" 3606 + checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" 3607 + dependencies = [ 3608 + "num-traits", 3609 + ] 3610 + 3611 + [[package]] 3582 3612 name = "p256" 3583 3613 version = "0.13.2" 3584 3614 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3721 3751 "rusqlite", 3722 3752 "serde", 3723 3753 "serde_json", 3724 - "thiserror 2.0.16", 3754 + "thiserror 2.0.18", 3725 3755 "tokio", 3726 3756 "tracing-subscriber", 3727 3757 ] ··· 3764 3794 "smallvec", 3765 3795 "sync_wrapper", 3766 3796 "tempfile", 3767 - "thiserror 2.0.16", 3797 + "thiserror 2.0.18", 3768 3798 "tokio", 3769 3799 "tokio-rustls 0.26.2", 3770 3800 "tokio-stream", ··· 3808 3838 "serde_json", 3809 3839 "serde_urlencoded", 3810 3840 "serde_yaml", 3811 - "thiserror 2.0.16", 3841 + "thiserror 2.0.18", 3812 3842 "tokio", 3813 3843 ] 3814 3844 ··· 3827 3857 "quote", 3828 3858 "regex", 3829 3859 "syn", 3830 - "thiserror 2.0.16", 3860 + "thiserror 2.0.18", 3831 3861 ] 3832 3862 3833 3863 [[package]] ··· 3857 3887 source = "registry+https://github.com/rust-lang/crates.io-index" 3858 3888 checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 3859 3889 dependencies = [ 3860 - "zerocopy 0.8.24", 3890 + "zerocopy", 3861 3891 ] 3862 3892 3863 3893 [[package]] ··· 3986 4016 "rustc-hash 2.1.1", 3987 4017 "rustls 0.23.31", 3988 4018 "socket2 0.5.9", 3989 - "thiserror 2.0.16", 4019 + "thiserror 2.0.18", 3990 4020 "tokio", 3991 4021 "tracing", 3992 4022 "web-time", ··· 4001 4031 "bytes", 4002 4032 "getrandom 0.3.3", 4003 4033 "lru-slab", 4004 - "rand 0.9.1", 4034 + "rand 0.9.3", 4005 4035 "ring", 4006 4036 "rustc-hash 2.1.1", 4007 4037 "rustls 0.23.31", 4008 4038 "rustls-pki-types", 4009 4039 "slab", 4010 - "thiserror 2.0.16", 4040 + "thiserror 2.0.18", 4011 4041 "tinyvec", 4012 4042 "tracing", 4013 4043 "web-time", ··· 4049 4079 checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" 4050 4080 4051 4081 [[package]] 4082 + name = "radix_trie" 4083 + version = "0.2.1" 4084 + source = "registry+https://github.com/rust-lang/crates.io-index" 4085 + checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" 4086 + dependencies = [ 4087 + "endian-type", 4088 + "nibble_vec", 4089 + ] 4090 + 4091 + [[package]] 4052 4092 name = "rand" 4053 4093 version = "0.8.5" 4054 4094 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4061 4101 4062 4102 [[package]] 4063 4103 name = "rand" 4064 - version = "0.9.1" 4104 + version = "0.9.3" 4065 4105 source = "registry+https://github.com/rust-lang/crates.io-index" 4066 - checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" 4106 + checksum = "7ec095654a25171c2124e9e3393a930bddbffdc939556c914957a4c3e0a87166" 4067 4107 dependencies = [ 4068 4108 "rand_chacha 0.9.0", 4069 4109 "rand_core 0.9.3", ··· 4126 4166 4127 4167 [[package]] 4128 4168 name = "rand_xoshiro" 4129 - version = "0.6.0" 4169 + version = "0.7.0" 4130 4170 source = "registry+https://github.com/rust-lang/crates.io-index" 4131 - checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" 4171 + checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" 4132 4172 dependencies = [ 4133 - "rand_core 0.6.4", 4173 + "rand_core 0.9.3", 4134 4174 ] 4135 4175 4136 4176 [[package]] 4137 - name = "rand_xoshiro" 4138 - version = "0.7.0" 4177 + name = "rapidhash" 4178 + version = "4.4.1" 4139 4179 source = "registry+https://github.com/rust-lang/crates.io-index" 4140 - checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" 4180 + checksum = "b5e48930979c155e2f33aa36ab3119b5ee81332beb6482199a8ecd6029b80b59" 4141 4181 dependencies = [ 4142 - "rand_core 0.9.3", 4182 + "rustversion", 4143 4183 ] 4144 4184 4145 4185 [[package]] ··· 4755 4795 "percent-encoding", 4756 4796 "ryu", 4757 4797 "serde", 4758 - "thiserror 2.0.16", 4798 + "thiserror 2.0.18", 4759 4799 ] 4760 4800 4761 4801 [[package]] ··· 4925 4965 "rustls 0.23.31", 4926 4966 "serde", 4927 4967 "serde_json", 4928 - "thiserror 2.0.16", 4968 + "thiserror 2.0.18", 4929 4969 "time", 4930 4970 "tokio", 4931 4971 "tokio-util", ··· 5036 5076 "log", 5037 5077 "metrics", 5038 5078 "metrics-exporter-prometheus 0.17.2", 5039 - "rand 0.9.1", 5079 + "rand 0.9.3", 5040 5080 "schemars", 5041 5081 "semver", 5042 5082 "serde", 5043 5083 "serde_json", 5044 5084 "serde_qs", 5045 - "thiserror 2.0.16", 5085 + "thiserror 2.0.18", 5046 5086 "tinyjson", 5047 5087 "tokio", 5048 5088 "tokio-tungstenite 0.27.0", ··· 5191 5231 5192 5232 [[package]] 5193 5233 name = "thiserror" 5194 - version = "2.0.16" 5234 + version = "2.0.18" 5195 5235 source = "registry+https://github.com/rust-lang/crates.io-index" 5196 - checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" 5236 + checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" 5197 5237 dependencies = [ 5198 - "thiserror-impl 2.0.16", 5238 + "thiserror-impl 2.0.18", 5199 5239 ] 5200 5240 5201 5241 [[package]] ··· 5211 5251 5212 5252 [[package]] 5213 5253 name = "thiserror-impl" 5214 - version = "2.0.16" 5254 + version = "2.0.18" 5215 5255 source = "registry+https://github.com/rust-lang/crates.io-index" 5216 - checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" 5256 + checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" 5217 5257 dependencies = [ 5218 5258 "proc-macro2", 5219 5259 "quote", ··· 5619 5659 "httparse", 5620 5660 "log", 5621 5661 "native-tls", 5622 - "rand 0.9.1", 5662 + "rand 0.9.3", 5623 5663 "sha1", 5624 - "thiserror 2.0.16", 5664 + "thiserror 2.0.18", 5625 5665 "url", 5626 5666 "utf-8", 5627 5667 ] ··· 5637 5677 "http", 5638 5678 "httparse", 5639 5679 "log", 5640 - "rand 0.9.1", 5680 + "rand 0.9.3", 5641 5681 "sha1", 5642 - "thiserror 2.0.16", 5682 + "thiserror 2.0.18", 5643 5683 "utf-8", 5644 5684 ] 5645 5685 ··· 5649 5689 source = "registry+https://github.com/rust-lang/crates.io-index" 5650 5690 checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" 5651 5691 dependencies = [ 5652 - "rand 0.9.1", 5692 + "rand 0.9.3", 5653 5693 ] 5654 5694 5655 5695 [[package]] ··· 5686 5726 "serde_qs", 5687 5727 "sha2", 5688 5728 "tempfile", 5689 - "thiserror 2.0.16", 5729 + "thiserror 2.0.18", 5690 5730 "tikv-jemallocator", 5691 5731 "tokio", 5692 5732 "tokio-util", ··· 6500 6540 "nom", 6501 6541 "oid-registry", 6502 6542 "rusticata-macros", 6503 - "thiserror 2.0.16", 6543 + "thiserror 2.0.18", 6504 6544 "time", 6505 6545 ] 6506 6546 ··· 6545 6585 6546 6586 [[package]] 6547 6587 name = "zerocopy" 6548 - version = "0.7.35" 6549 - source = "registry+https://github.com/rust-lang/crates.io-index" 6550 - checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" 6551 - dependencies = [ 6552 - "zerocopy-derive 0.7.35", 6553 - ] 6554 - 6555 - [[package]] 6556 - name = "zerocopy" 6557 6588 version = "0.8.24" 6558 6589 source = "registry+https://github.com/rust-lang/crates.io-index" 6559 6590 checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" 6560 6591 dependencies = [ 6561 - "zerocopy-derive 0.8.24", 6562 - ] 6563 - 6564 - [[package]] 6565 - name = "zerocopy-derive" 6566 - version = "0.7.35" 6567 - source = "registry+https://github.com/rust-lang/crates.io-index" 6568 - checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" 6569 - dependencies = [ 6570 - "proc-macro2", 6571 - "quote", 6572 - "syn", 6592 + "zerocopy-derive", 6573 6593 ] 6574 6594 6575 6595 [[package]]
+4
Cargo.toml
··· 14 14 15 15 [workspace.dependencies] 16 16 clap = { version = "4.5.56", features = ["derive", "env"] } 17 + metrics = "0.24.5" 18 + metrics-exporter-prometheus = { version = "0.18.3", default-features = false, features = ["http-listener"] } 19 + metrics-util = "0.20.3" 20 + thiserror = "2.0.18" 17 21 tracing = "0.1.44" 18 22 tracing-subscriber = { version = "0.3.23", features = ["env-filter"] }
+7 -5
constellation/Cargo.toml
··· 13 13 bincode = "1.3.3" 14 14 clap = { workspace = true } 15 15 ctrlc = "3.4.5" 16 - eat-rocks = { version = "0.1.1", optional = true, features = ["easy"] } 16 + eat-rocks = { version = "0.2.0", optional = true, features = ["easy"] } 17 17 flume = { version = "0.11.1", default-features = false } 18 18 fs4 = { version = "0.12.0", features = ["sync"] } 19 19 headers-accept = "0.1.4" 20 20 links = { path = "../links" } 21 21 mediatype = "0.19.18" 22 - metrics = "0.24.1" 23 - metrics-exporter-prometheus = { version = "0.16.1", default-features = false, features = ["http-listener"] } 22 + metrics = { workspace = true } 23 + metrics-exporter-prometheus = { workspace = true } 24 24 metrics-process = "2.4.0" 25 + metrics-util = { workspace = true } 25 26 num-format = "0.4.4" 26 27 ratelimit = "0.10.0" 27 28 rocksdb = { version = "0.23.0", optional = true } 28 29 serde = { version = "1.0.215", features = ["derive"] } 29 30 serde_json = "1.0.139" 30 31 serde_with = { version = "3.12.0", features = ["hex"] } 32 + thiserror = { workspace = true } 31 33 tinyjson = "2.5.1" 32 34 tokio-util = "0.7.13" 33 35 tower-http = { version = "0.6.2", features = ["cors"] } 34 - tracing = { version = "0.1.44", workspace = true } 35 - tracing-subscriber = { version = "0.3.23", workspace = true } 36 + tracing = { workspace = true } 37 + tracing-subscriber = { workspace = true } 36 38 zstd = "0.13.2" 37 39 38 40 [dependencies.tokio]
+135 -30
constellation/src/bin/main.rs
··· 1 1 use anyhow::{bail, Result}; 2 2 use clap::{Parser, ValueEnum}; 3 + use metrics::{describe_counter, describe_gauge, describe_histogram, Unit}; 3 4 use metrics_exporter_prometheus::PrometheusBuilder; 4 5 use std::net::SocketAddr; 5 6 use std::num::NonZero; ··· 60 61 /// Saved jsonl from jetstream to use instead of a live subscription 61 62 #[arg(short, long)] 62 63 fixture: Option<PathBuf>, 63 - /// run a scan across the target id table and write all key -> ids to id -> keys 64 + /// Don't change the database jetstream cursor when using a fixture 65 + #[arg(long, requires("fixture"))] 66 + fixture_preserve_cursor: bool, 67 + /// fix the constellation start date (funny previous bug oops) 64 68 #[arg(long, action)] 65 - repair_target_ids: bool, 69 + reset_db_start: bool, 66 70 } 67 71 68 72 #[derive(Debug, Clone, ValueEnum)] ··· 88 92 println!("starting with storage backend: {:?}...", args.backend); 89 93 90 94 let fixture = args.fixture; 95 + let fixture_preserve_cursor = args.fixture_preserve_cursor; 91 96 if let Some(ref p) = fixture { 92 - println!("using fixture at {p:?}..."); 97 + println!("using fixture at {p:?}, preserving cursor? {fixture_preserve_cursor:?}..."); 93 98 } 94 99 95 100 let stream = jetstream_url(&args.jetstream); ··· 105 110 StorageBackend::Memory => run( 106 111 MemStorage::new(), 107 112 fixture, 113 + fixture_preserve_cursor, 108 114 None, 109 115 args.did_web_domain, 110 116 stream, ··· 128 134 } 129 135 println!("rocks ready."); 130 136 std::thread::scope(|s| { 131 - if args.repair_target_ids { 132 - let rocks = rocks.clone(); 133 - let stay_alive = stay_alive.clone(); 134 - s.spawn(move || { 135 - let rep = rocks.run_repair(time::Duration::from_millis(0), stay_alive); 136 - eprintln!("repair finished: {rep:?}"); 137 - rep 138 - }); 137 + if args.reset_db_start { 138 + let res = rocks.reset_start(); 139 + eprintln!("reset start finished: {res:?}"); 139 140 } 140 141 s.spawn(|| { 141 142 let r = run( 142 143 rocks, 143 144 fixture, 145 + fixture_preserve_cursor, 144 146 args.data, 145 147 args.did_web_domain, 146 148 stream, ··· 163 165 fn run( 164 166 mut storage: impl LinkStorage, 165 167 fixture: Option<PathBuf>, 168 + fixture_preserve_cursor: bool, 166 169 data_dir: Option<PathBuf>, 167 170 did_web_domain: Option<String>, 168 171 stream: String, ··· 184 187 } 185 188 })?; 186 189 190 + // Install metrics server only if requested 191 + if collect_metrics { 192 + install_metrics_server(metrics_bind)?; 193 + } 194 + 187 195 let qsize = Arc::new(AtomicU32::new(0)); 188 196 189 197 thread::scope(|s| { ··· 194 202 let stay_alive = stay_alive.clone(); 195 203 let staying_alive = stay_alive.clone(); 196 204 move || { 197 - if let Err(e) = consume(storage, qsize, fixture, stream, staying_alive) { 205 + if let Err(e) = consume( 206 + storage, 207 + qsize, 208 + fixture, 209 + fixture_preserve_cursor, 210 + stream, 211 + staying_alive, 212 + ) { 198 213 eprintln!("jetstream finished with error: {e}"); 199 214 } 200 215 stay_alive.drop_guard(); ··· 212 227 .enable_all() 213 228 .build() 214 229 .expect("axum startup") 215 - .block_on(async { 216 - // Install metrics server only if requested 217 - if collect_metrics { 218 - install_metrics_server(metrics_bind)?; 219 - } 220 - serve(readable, bind, did_web_domain, staying_alive).await 221 - }) 230 + .block_on(serve(readable, bind, did_web_domain, staying_alive)) 222 231 .unwrap(); 223 232 stay_alive.drop_guard(); 224 233 } ··· 231 240 let check_alive = stay_alive.clone(); 232 241 233 242 let process_collector = metrics_process::Collector::default(); 234 - process_collector.describe(); 235 - metrics::describe_gauge!( 236 - "storage_available", 237 - metrics::Unit::Bytes, 238 - "available to be allocated" 239 - ); 240 - metrics::describe_gauge!( 241 - "storage_free", 242 - metrics::Unit::Bytes, 243 - "unused bytes in filesystem" 244 - ); 245 243 if let Some(ref p) = data_dir { 246 244 if let Err(e) = fs4::available_space(p) { 247 245 eprintln!("fs4 failed to get available space. may not be supported here? space metrics may be absent. e: {e:?}"); ··· 289 287 290 288 fn install_metrics_server(metrics_bind: SocketAddr) -> Result<()> { 291 289 println!("installing metrics server..."); 290 + #[expect( 291 + deprecated, 292 + reason = "would change counters to _total suffix, needs dash updates" 293 + )] 292 294 PrometheusBuilder::new() 295 + .idle_timeout( 296 + metrics_util::MetricKindMask::ALL, 297 + Some(time::Duration::from_secs(900)), // 15 min 298 + ) 293 299 .set_quantiles(&[0.5, 0.9, 0.99, 1.0])? 294 300 .set_bucket_duration(time::Duration::from_secs(30))? 295 301 .set_bucket_count(NonZero::new(10).unwrap()) // count * duration = 5 mins. stuff doesn't happen that fast here. 296 302 .set_enable_unit_suffix(true) 297 303 .with_http_listener(metrics_bind) 298 304 .install()?; 305 + describe_metrics(); 299 306 println!("metrics server installed! listening at {metrics_bind:?}"); 300 307 Ok(()) 308 + } 309 + 310 + fn describe_metrics() { 311 + metrics_process::Collector::default().describe(); 312 + describe_gauge!( 313 + "storage_available", 314 + Unit::Bytes, 315 + "available to be allocated" 316 + ); 317 + describe_gauge!("storage_free", Unit::Bytes, "unused bytes in filesystem"); 318 + describe_counter!( 319 + "jetstream_connnect", 320 + Unit::Count, 321 + "attempts to connect to a jetstream server" 322 + ); 323 + describe_counter!( 324 + "jetstream_read", 325 + Unit::Count, 326 + "attempts to read an event from jetstream" 327 + ); 328 + describe_counter!( 329 + "jetstream_read_fail", 330 + Unit::Count, 331 + "failures to read events from jetstream" 332 + ); 333 + describe_counter!( 334 + "jetstream_read_bytes", 335 + Unit::Bytes, 336 + "total received message bytes from jetstream" 337 + ); 338 + describe_counter!( 339 + "jetstream_read_bytes_decompressed", 340 + Unit::Bytes, 341 + "total decompressed message bytes from jetstream" 342 + ); 343 + describe_histogram!( 344 + "jetstream_read_bytes_decompressed", 345 + Unit::Bytes, 346 + "decompressed size of jetstream messages" 347 + ); 348 + describe_counter!( 349 + "jetstream_events", 350 + Unit::Count, 351 + "valid json messages received" 352 + ); 353 + describe_histogram!( 354 + "jetstream_events_queued", 355 + Unit::Count, 356 + "event messages waiting in queue" 357 + ); 358 + describe_gauge!( 359 + "jetstream_cursor_age", 360 + Unit::Microseconds, 361 + "microseconds between our clock and the jetstream event's time_us" 362 + ); 363 + describe_counter!( 364 + "consumer_events_non_actionable", 365 + Unit::Count, 366 + "count of non-actionable events" 367 + ); 368 + describe_counter!( 369 + "consumer_events_actionable", 370 + Unit::Count, 371 + "count of action by type. *all* atproto record delete events are included" 372 + ); 373 + describe_counter!( 374 + "consumer_events_actionable_links", 375 + Unit::Count, 376 + "total links encountered" 377 + ); 378 + describe_histogram!( 379 + "consumer_events_actionable_links", 380 + Unit::Count, 381 + "number of links per message" 382 + ); 383 + #[cfg(feature = "rocks")] 384 + { 385 + describe_histogram!( 386 + "storage_rocksdb_read_seconds", 387 + Unit::Seconds, 388 + "duration of the read stage of actions" 389 + ); 390 + describe_histogram!( 391 + "storage_rocksdb_action_seconds", 392 + Unit::Seconds, 393 + "duration of read + write of actions" 394 + ); 395 + describe_counter!( 396 + "storage_rocksdb_batch_ops_total", 397 + Unit::Count, 398 + "total batched operations from actions" 399 + ); 400 + describe_histogram!( 401 + "storage_rocksdb_delete_account_ops", 402 + Unit::Count, 403 + "total batched ops for account deletions" 404 + ); 405 + } 301 406 } 302 407 303 408 #[cfg(test)]
+2 -1
constellation/src/bin/rocks-restore-from-backup.rs
··· 117 117 backup_id: Option<u32>, 118 118 concurrency: Option<usize>, 119 119 ) -> Result<()> { 120 - use eat_rocks::{public_bucket, restore, RestoreOptions}; 120 + use eat_rocks::{public_bucket, restore, RestoreOptions, TargetMode}; 121 121 use tokio::runtime::Runtime; 122 122 123 123 let rt = Runtime::new()?; ··· 130 130 RestoreOptions { 131 131 backup_id: backup_id.map(u64::from), 132 132 concurrency: concurrency.unwrap_or(RestoreOptions::default().concurrency), 133 + target_mode: TargetMode::CreateOrReplace, 133 134 ..Default::default() 134 135 }, 135 136 )
+1 -49
constellation/src/consumer/jetstream.rs
··· 1 1 use anyhow::{bail, Result}; 2 - use metrics::{ 3 - counter, describe_counter, describe_gauge, describe_histogram, gauge, histogram, Unit, 4 - }; 2 + use metrics::{counter, gauge, histogram}; 5 3 use std::io::{Cursor, ErrorKind, Read}; 6 4 use std::net::ToSocketAddrs; 7 5 use std::thread; ··· 19 17 stream: String, 20 18 staying_alive: CancellationToken, 21 19 ) -> Result<()> { 22 - describe_counter!( 23 - "jetstream_connnect", 24 - Unit::Count, 25 - "attempts to connect to a jetstream server" 26 - ); 27 - describe_counter!( 28 - "jetstream_read", 29 - Unit::Count, 30 - "attempts to read an event from jetstream" 31 - ); 32 - describe_counter!( 33 - "jetstream_read_fail", 34 - Unit::Count, 35 - "failures to read events from jetstream" 36 - ); 37 - describe_counter!( 38 - "jetstream_read_bytes", 39 - Unit::Bytes, 40 - "total received message bytes from jetstream" 41 - ); 42 - describe_counter!( 43 - "jetstream_read_bytes_decompressed", 44 - Unit::Bytes, 45 - "total decompressed message bytes from jetstream" 46 - ); 47 - describe_histogram!( 48 - "jetstream_read_bytes_decompressed", 49 - Unit::Bytes, 50 - "decompressed size of jetstream messages" 51 - ); 52 - describe_counter!( 53 - "jetstream_events", 54 - Unit::Count, 55 - "valid json messages received" 56 - ); 57 - describe_histogram!( 58 - "jetstream_events_queued", 59 - Unit::Count, 60 - "event messages waiting in queue" 61 - ); 62 - describe_gauge!( 63 - "jetstream_cursor_age", 64 - Unit::Microseconds, 65 - "microseconds between our clock and the jetstream event's time_us" 66 - ); 67 - 68 20 let dict = DecoderDictionary::copy(JETSTREAM_ZSTD_DICTIONARY); 69 21 let mut connect_retries = 0; 70 22 let mut latest_cursor = cursor;
+108 -27
constellation/src/consumer/mod.rs
··· 6 6 use anyhow::Result; 7 7 use jetstream::consume_jetstream; 8 8 use jsonl_file::consume_jsonl_file; 9 - use links::collect_links; 10 - use metrics::{counter, describe_counter, describe_histogram, histogram, Unit}; 9 + use links::{parse_any_link, record::walk_record, CollectedLink}; 10 + use metrics::{counter, histogram}; 11 11 use std::path::PathBuf; 12 12 use std::sync::atomic::{AtomicU32, Ordering}; 13 13 use std::sync::Arc; ··· 19 19 mut store: impl LinkStorage, 20 20 qsize: Arc<AtomicU32>, 21 21 fixture: Option<PathBuf>, 22 + fixture_preserve_cursor: bool, 22 23 stream: String, 23 24 staying_alive: CancellationToken, 24 25 ) -> Result<()> { 25 - describe_counter!( 26 - "consumer_events_non_actionable", 27 - Unit::Count, 28 - "count of non-actionable events" 29 - ); 30 - describe_counter!( 31 - "consumer_events_actionable", 32 - Unit::Count, 33 - "count of action by type. *all* atproto record delete events are included" 34 - ); 35 - describe_counter!( 36 - "consumer_events_actionable_links", 37 - Unit::Count, 38 - "total links encountered" 39 - ); 40 - describe_histogram!( 41 - "consumer_events_actionable_links", 42 - Unit::Count, 43 - "number of links per message" 44 - ); 45 - 26 + let mut fixture_cursor = None; 46 27 let (receiver, consumer_handle) = if let Some(f) = fixture { 47 28 let (sender, receiver) = flume::bounded(21); 29 + if fixture_preserve_cursor { 30 + fixture_cursor = store.get_cursor()?; 31 + if fixture_cursor.is_none() { 32 + anyhow::bail!( 33 + "--fixture-preserve-cursor was set but the database has no \ 34 + existing cursor to preserve. either drop the flag (cursor \ 35 + will be set to the last event in the fixture, current default \ 36 + behavior) or run a live jetstream session first." 37 + ) 38 + } 39 + } 48 40 ( 49 41 receiver, 50 42 thread::spawn(move || consume_jsonl_file(f, sender)), 51 43 ) 52 44 } else { 53 - let (sender, receiver) = flume::bounded(32_768); // eek 45 + let (sender, receiver) = flume::bounded(1024); 54 46 let cursor = store.get_cursor().unwrap(); 55 47 ( 56 48 receiver, ··· 61 53 for update in receiver.iter() { 62 54 if let Some((action, ts)) = get_actionable(&update) { 63 55 { 64 - store.push(&action, ts).unwrap(); 56 + store.push(&action, fixture_cursor.unwrap_or(ts)).unwrap(); 65 57 qsize.store(receiver.len().try_into().unwrap(), Ordering::Relaxed); 66 58 } 67 59 } else { ··· 99 91 }; 100 92 match commit.get("operation")? { 101 93 JsonValue::String(op) if op == "create" => { 102 - let links = collect_links(commit.get("record")?); 94 + let mut links = vec![]; 95 + // 1. extract links (dids probably) from rkey, if there 96 + if let Some(target) = parse_any_link(rkey) { 97 + links.push(CollectedLink { 98 + path: ".".into(), 99 + target, 100 + }); 101 + } 102 + // 2. and from the record body 103 + walk_record("", commit.get("record")?, &mut links); 104 + 103 105 counter!("consumer_events_actionable", "action_type" => "create_links", "collection" => collection.clone()).increment(1); 104 106 histogram!("consumer_events_actionable_links", "action_type" => "create_links", "collection" => collection.clone()).record(links.len() as f64); 105 107 for link in &links { ··· 128 130 } 129 131 } 130 132 JsonValue::String(op) if op == "update" => { 131 - let links = collect_links(commit.get("record")?); 133 + let mut links = vec![]; 134 + // 1. extract links (dids probably) from rkey, if there 135 + if let Some(target) = parse_any_link(rkey) { 136 + links.push(CollectedLink { 137 + path: ".".into(), 138 + target, 139 + }); 140 + } 141 + // 2. and from the record body 142 + walk_record("", commit.get("record")?, &mut links); 143 + 132 144 counter!("consumer_events_actionable", "action_type" => "update_links", "collection" => collection.clone()).increment(1); 133 145 histogram!("consumer_events_actionable_links", "action_type" => "update_links", "collection" => collection.clone()).record(links.len() as f64); 134 146 for link in &links { ··· 334 346 Some(( 335 347 ActionableEvent::DeactivateAccount("did:plc:l4jb3hkq7lrblferbywxkiol".into()), 336 348 1736451745611273 349 + )) 350 + ) 351 + } 352 + 353 + #[test] 354 + fn test_create_vouch_indexes_did_rkey() { 355 + let rec = r#"{ 356 + "did":"did:plc:voucher", 357 + "time_us":1746460800000000, 358 + "kind":"commit", 359 + "commit":{"rev":"3lqrvouchcreate","operation":"create","collection":"sh.tangled.graph.vouch","rkey":"did:plc:vouchedfor","record":{ 360 + "$type":"sh.tangled.graph.vouch", 361 + "createdAt":"2026-05-05T12:00:00.000Z" 362 + }} 363 + }"#.parse().unwrap(); 364 + let action = get_actionable(&rec); 365 + assert_eq!( 366 + action, 367 + Some(( 368 + ActionableEvent::CreateLinks { 369 + record_id: RecordId { 370 + did: "did:plc:voucher".into(), 371 + collection: "sh.tangled.graph.vouch".into(), 372 + rkey: "did:plc:vouchedfor".into(), 373 + }, 374 + links: vec![CollectedLink { 375 + path: ".".into(), 376 + target: Link::Did("did:plc:vouchedfor".into()), 377 + }], 378 + }, 379 + 1746460800000000 380 + )) 381 + ) 382 + } 383 + 384 + #[test] 385 + fn test_update_vouch_indexes_did_rkey() { 386 + let rec = r#"{ 387 + "did":"did:plc:voucher", 388 + "time_us":1746460800000001, 389 + "kind":"commit", 390 + "commit":{"rev":"3lqrvouchupdate","operation":"update","collection":"sh.tangled.graph.vouch","rkey":"did:plc:vouchedfor","record":{ 391 + "$type":"sh.tangled.graph.vouch", 392 + "createdAt":"2026-05-05T12:00:00.000Z", 393 + "reason":"https://atproto.com" 394 + }} 395 + }"#.parse().unwrap(); 396 + let action = get_actionable(&rec); 397 + assert_eq!( 398 + action, 399 + Some(( 400 + ActionableEvent::UpdateLinks { 401 + record_id: RecordId { 402 + did: "did:plc:voucher".into(), 403 + collection: "sh.tangled.graph.vouch".into(), 404 + rkey: "did:plc:vouchedfor".into(), 405 + }, 406 + new_links: vec![ 407 + CollectedLink { 408 + path: ".".into(), 409 + target: Link::Did("did:plc:vouchedfor".into()), 410 + }, 411 + CollectedLink { 412 + path: ".reason".into(), 413 + target: Link::Uri("https://atproto.com".into()), 414 + }, 415 + ], 416 + }, 417 + 1746460800000001 337 418 )) 338 419 ) 339 420 }
+21 -27
constellation/src/server/mod.rs
··· 22 22 23 23 mod acceptable; 24 24 mod filters; 25 + mod link_source; 25 26 26 27 use acceptable::{acceptable, ExtractAccept}; 28 + use link_source::{parse_link_source, parse_path}; 27 29 28 30 const DEFAULT_CURSOR_LIMIT: u64 = 100; 29 31 const DEFAULT_CURSOR_LIMIT_MAX: u64 = 1000; ··· 353 355 .filter(|s| !s.is_empty()), 354 356 ); 355 357 356 - let Some((collection, path)) = query.source.split_once(':') else { 357 - return Err(http::StatusCode::BAD_REQUEST); 358 - }; 359 - let path = format!(".{path}"); 358 + let (collection, path) = 359 + parse_link_source(&query.source).map_err(|_| http::StatusCode::BAD_REQUEST)?; // TODO: better response errors! 360 360 361 - let path_to_other = format!(".{}", query.path_to_other); 361 + let path_to_other = 362 + parse_path(&query.path_to_other).map_err(|_| http::StatusCode::BAD_REQUEST)?; // TODO: better response errors! 362 363 363 364 let paged = store 364 365 .get_many_to_many_counts( 365 366 &query.subject, 366 - collection, 367 + &collection, 367 368 &path, 368 369 &path_to_other, 369 370 limit, ··· 442 443 query: axum_extra::extract::Query<GetItemsCountQuery>, 443 444 store: impl LinkReader, 444 445 ) -> Result<impl IntoResponse, http::StatusCode> { 445 - let Some((collection, path)) = query.source.split_once(':') else { 446 - return Err(http::StatusCode::BAD_REQUEST); 447 - }; 448 - let path = format!(".{path}"); 446 + let (collection, path) = 447 + parse_link_source(&query.source).map_err(|_| http::StatusCode::BAD_REQUEST)?; // TODO: better response errors! 449 448 let total = store 450 - .get_count(&query.subject, collection, &path) 449 + .get_count(&query.subject, &collection, &path) 451 450 .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?; 452 451 453 452 Ok(acceptable( ··· 551 550 .map(|d| Did(d.to_string())), 552 551 ); 553 552 554 - let Some((collection, path)) = query.source.split_once(':') else { 555 - return Err(http::StatusCode::BAD_REQUEST); 556 - }; 557 - let path = format!(".{path}"); 553 + let (collection, path) = 554 + parse_link_source(&query.source).map_err(|_| http::StatusCode::BAD_REQUEST)?; // TODO: better response errors! 558 555 559 556 let order = if query.reverse { 560 557 Order::OldestToNewest ··· 565 562 let paged = store 566 563 .get_links( 567 564 &query.subject, 568 - collection, 565 + &collection, 569 566 &path, 570 567 order, 571 568 limit, ··· 755 752 .filter(|s| !s.is_empty()), 756 753 ); 757 754 758 - let Some((collection, path)) = query.source.split_once(':') else { 759 - return Err(http::StatusCode::BAD_REQUEST); 760 - }; 761 - let path = format!(".{path}"); 755 + let (collection, path) = 756 + parse_link_source(&query.source).map_err(|_| http::StatusCode::BAD_REQUEST)?; // TODO: better response errors! 762 757 763 - let path_to_other = format!(".{}", query.path_to_other); 758 + let path_to_other = 759 + parse_path(&query.path_to_other).map_err(|_| http::StatusCode::BAD_REQUEST)?; // TODO: better response errors! 764 760 765 761 let paged = store 766 762 .get_many_to_many( 767 763 &query.subject, 768 - collection, 764 + &collection, 769 765 &path, 770 766 &path_to_other, 771 767 limit, ··· 824 820 return Err(http::StatusCode::BAD_REQUEST); 825 821 } 826 822 827 - let Some((collection, path)) = query.source.split_once(':') else { 828 - return Err(http::StatusCode::BAD_REQUEST); 829 - }; 830 - let path = format!(".{path}"); 823 + let (collection, path) = 824 + parse_link_source(&query.source).map_err(|_| http::StatusCode::BAD_REQUEST)?; // TODO: better response errors! 831 825 832 826 let paged = store 833 - .get_distinct_dids(&query.subject, collection, &path, limit, until) 827 + .get_distinct_dids(&query.subject, &collection, &path, limit, until) 834 828 .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?; 835 829 836 830 let cursor = paged.next.map(|next| {
+127
constellation/src/storage/mod.rs
··· 1499 1499 assert_stats(storage.get_stats()?, 1..=1, 2..=2, 1..=1); 1500 1500 }); 1501 1501 1502 + //////// rkey-indexed (path = ".") ///////// 1503 + 1504 + test_each_storage!(rkey_indexed_basic, |storage| { 1505 + storage.push( 1506 + &ActionableEvent::CreateLinks { 1507 + record_id: RecordId { 1508 + did: "did:plc:voucher".into(), 1509 + collection: "sh.tangled.graph.vouch".into(), 1510 + rkey: "did:plc:vouchedfor".into(), 1511 + }, 1512 + links: vec![CollectedLink { 1513 + target: Link::Did("did:plc:vouchedfor".into()), 1514 + path: ".".into(), 1515 + }], 1516 + }, 1517 + 0, 1518 + )?; 1519 + 1520 + assert_eq!( 1521 + storage.get_count("did:plc:vouchedfor", "sh.tangled.graph.vouch", ".")?, 1522 + 1 1523 + ); 1524 + assert_eq!( 1525 + storage.get_distinct_did_count("did:plc:vouchedfor", "sh.tangled.graph.vouch", ".")?, 1526 + 1 1527 + ); 1528 + assert_eq!( 1529 + storage.get_links( 1530 + "did:plc:vouchedfor", 1531 + "sh.tangled.graph.vouch", 1532 + ".", 1533 + Order::NewestToOldest, 1534 + 100, 1535 + None, 1536 + &HashSet::default(), 1537 + )?, 1538 + PagedAppendingCollection { 1539 + version: (1, 0), 1540 + items: vec![RecordId { 1541 + did: "did:plc:voucher".into(), 1542 + collection: "sh.tangled.graph.vouch".into(), 1543 + rkey: "did:plc:vouchedfor".into(), 1544 + }], 1545 + next: None, 1546 + total: 1, 1547 + } 1548 + ); 1549 + assert_stats(storage.get_stats()?, 1..=1, 1..=1, 1..=1); 1550 + 1551 + storage.push( 1552 + &ActionableEvent::DeleteRecord(RecordId { 1553 + did: "did:plc:voucher".into(), 1554 + collection: "sh.tangled.graph.vouch".into(), 1555 + rkey: "did:plc:vouchedfor".into(), 1556 + }), 1557 + 0, 1558 + )?; 1559 + assert_eq!( 1560 + storage.get_count("did:plc:vouchedfor", "sh.tangled.graph.vouch", ".")?, 1561 + 0 1562 + ); 1563 + }); 1564 + 1565 + test_each_storage!(rkey_link_and_record_link_coexist, |storage| { 1566 + storage.push( 1567 + &ActionableEvent::CreateLinks { 1568 + record_id: RecordId { 1569 + did: "did:plc:voucher".into(), 1570 + collection: "sh.tangled.graph.vouch".into(), 1571 + rkey: "did:plc:vouchedfor".into(), 1572 + }, 1573 + links: vec![ 1574 + CollectedLink { 1575 + target: Link::Did("did:plc:vouchedfor".into()), 1576 + path: ".".into(), 1577 + }, 1578 + CollectedLink { 1579 + target: Link::Uri("https://atproto.com".into()), 1580 + path: ".reason".into(), 1581 + }, 1582 + ], 1583 + }, 1584 + 0, 1585 + )?; 1586 + 1587 + assert_eq!( 1588 + storage.get_count("did:plc:vouchedfor", "sh.tangled.graph.vouch", ".")?, 1589 + 1 1590 + ); 1591 + assert_eq!( 1592 + storage.get_count("https://atproto.com", "sh.tangled.graph.vouch", ".reason")?, 1593 + 1 1594 + ); 1595 + 1596 + assert_eq!(storage.get_all_record_counts("did:plc:vouchedfor")?, { 1597 + let mut counts = HashMap::new(); 1598 + let mut by_path = HashMap::new(); 1599 + by_path.insert(".".into(), 1); 1600 + counts.insert("sh.tangled.graph.vouch".into(), by_path); 1601 + counts 1602 + }); 1603 + assert_eq!(storage.get_all_record_counts("https://atproto.com")?, { 1604 + let mut counts = HashMap::new(); 1605 + let mut by_path = HashMap::new(); 1606 + by_path.insert(".reason".into(), 1); 1607 + counts.insert("sh.tangled.graph.vouch".into(), by_path); 1608 + counts 1609 + }); 1610 + 1611 + storage.push( 1612 + &ActionableEvent::DeleteRecord(RecordId { 1613 + did: "did:plc:voucher".into(), 1614 + collection: "sh.tangled.graph.vouch".into(), 1615 + rkey: "did:plc:vouchedfor".into(), 1616 + }), 1617 + 0, 1618 + )?; 1619 + assert_eq!( 1620 + storage.get_count("did:plc:vouchedfor", "sh.tangled.graph.vouch", ".")?, 1621 + 0 1622 + ); 1623 + assert_eq!( 1624 + storage.get_count("https://atproto.com", "sh.tangled.graph.vouch", ".reason")?, 1625 + 0 1626 + ); 1627 + }); 1628 + 1502 1629 //////// many-to-many ///////// 1503 1630 1504 1631 test_each_storage!(get_m2m_counts_empty, |storage| {
+42 -118
constellation/src/storage/rocks_store.rs
··· 7 7 use anyhow::{anyhow, bail, Result}; 8 8 use bincode::Options as BincodeOptions; 9 9 use links::CollectedLink; 10 - use metrics::{counter, describe_counter, describe_histogram, histogram, Unit}; 10 + use metrics::{counter, histogram}; 11 11 use ratelimit::Ratelimiter; 12 12 use rocksdb::backup::{BackupEngine, BackupEngineOptions}; 13 13 use rocksdb::{ ··· 256 256 257 257 impl RocksStorage { 258 258 pub fn new(path: impl AsRef<Path>) -> Result<Self> { 259 - Self::describe_metrics(); 260 259 let me = RocksStorage::open_readmode(path, false)?; 261 260 me.global_init()?; 262 261 Ok(me) ··· 308 307 } 309 308 310 309 fn global_init(&self) -> Result<()> { 311 - let first_run = self.db.get(JETSTREAM_CURSOR_KEY)?.is_some(); 312 - if first_run { 310 + if self.db.get(STARTED_AT_KEY)?.is_none() { 313 311 self.db.put(STARTED_AT_KEY, _rv(now()))?; 314 - 315 - // hack / temporary: if we're a new db, put in a completed repair 316 - // state so we don't run repairs (repairs are for old-code dbs) 317 - let completed = TargetIdRepairState { 318 - id_when_started: 0, 319 - current_us_started_at: 0, 320 - latest_repaired_i: 0, 321 - }; 322 - self.db.put(TARGET_ID_REPAIR_STATE_KEY, _rv(completed))?; 323 312 } 324 313 Ok(()) 325 314 } 326 315 327 - pub fn run_repair(&self, breather: Duration, stay_alive: CancellationToken) -> Result<bool> { 328 - let mut state = match self 329 - .db 330 - .get(TARGET_ID_REPAIR_STATE_KEY)? 331 - .map(|s| _vr(&s)) 332 - .transpose()? 333 - { 334 - Some(s) => s, 335 - None => TargetIdRepairState { 336 - id_when_started: self.did_id_table.priv_id_seq, 337 - current_us_started_at: now(), 338 - latest_repaired_i: 0, 339 - }, 340 - }; 341 - 342 - eprintln!("initial repair state: {state:?}"); 343 - 344 - let cf = self.db.cf_handle(TARGET_IDS_CF).unwrap(); 345 - 346 - let mut iter = self.db.raw_iterator_cf(&cf); 347 - iter.seek_to_first(); 348 - 349 - eprintln!("repair iterator sent to first key"); 350 - 351 - // skip ahead if we're done some, or take a single first step 352 - for _ in 0..state.latest_repaired_i { 353 - iter.next(); 316 + pub fn reset_start(&self) -> Result<bool> { 317 + let existing = self.db.get(STARTED_AT_KEY)?; 318 + if existing.is_none() { 319 + bail!("not resetting started-at key because one wasn't set"); 354 320 } 355 - 356 - eprintln!( 357 - "repair iterator skipped to {}th key", 358 - state.latest_repaired_i 359 - ); 360 - 361 - let mut maybe_done = false; 362 - 363 - let mut write_fast = rocksdb::WriteOptions::default(); 364 - write_fast.set_sync(false); 365 - write_fast.disable_wal(true); 366 - 367 - while !stay_alive.is_cancelled() && !maybe_done { 368 - // let mut batch = WriteBatch::default(); 369 - 370 - let mut any_written = false; 371 - 372 - for _ in 0..1000 { 373 - if state.latest_repaired_i % 1_000_000 == 0 { 374 - eprintln!("target iter at {}", state.latest_repaired_i); 375 - } 376 - state.latest_repaired_i += 1; 377 - 378 - if !iter.valid() { 379 - eprintln!("invalid iter, are we done repairing?"); 380 - maybe_done = true; 381 - break; 382 - }; 383 - 384 - // eprintln!("iterator seems to be valid! getting the key..."); 385 - let raw_key = iter.key().unwrap(); 386 - if raw_key.len() == 8 { 387 - // eprintln!("found an 8-byte key, skipping it since it's probably an id..."); 388 - iter.next(); 389 - continue; 390 - } 391 - let target: TargetKey = _kr::<TargetKey>(raw_key)?; 392 - let target_id: TargetId = _vr(iter.value().unwrap())?; 393 - 394 - self.db 395 - .put_cf_opt(&cf, target_id.id().to_be_bytes(), _rv(&target), &write_fast)?; 396 - any_written = true; 397 - iter.next(); 398 - } 399 - 400 - if any_written { 401 - self.db 402 - .put(TARGET_ID_REPAIR_STATE_KEY, _rv(state.clone()))?; 403 - std::thread::sleep(breather); 404 - } 405 - } 406 - 407 - eprintln!("repair iterator done."); 408 - 409 - Ok(false) 321 + self.db.put(STARTED_AT_KEY, _rv(COZY_FIRST_CURSOR))?; 322 + println!("started-at key reset to {COZY_FIRST_CURSOR}"); 323 + Ok(true) 410 324 } 411 325 412 326 pub fn start_backup( ··· 503 417 BackupEngine::open(&BackupEngineOptions::new(path)?, &rocksdb::Env::new()?)?; 504 418 engine.purge_old_backups(num_backups_to_keep)?; 505 419 Ok(()) 506 - } 507 - 508 - fn describe_metrics() { 509 - describe_histogram!( 510 - "storage_rocksdb_read_seconds", 511 - Unit::Seconds, 512 - "duration of the read stage of actions" 513 - ); 514 - describe_histogram!( 515 - "storage_rocksdb_action_seconds", 516 - Unit::Seconds, 517 - "duration of read + write of actions" 518 - ); 519 - describe_counter!( 520 - "storage_rocksdb_batch_ops_total", 521 - Unit::Count, 522 - "total batched operations from actions" 523 - ); 524 - describe_histogram!( 525 - "storage_rocksdb_delete_account_ops", 526 - Unit::Count, 527 - "total batched ops for account deletions" 528 - ); 529 420 } 530 421 531 422 fn merge_op_extend_did_ids( ··· 827 718 impl Drop for RocksStorage { 828 719 fn drop(&mut self) { 829 720 if self.is_writer { 721 + // TODO: cloning a writer is possible and currently breaks things 722 + // (constellation code currently doesn't/shouldn't clone the writer) 830 723 println!("rocksdb writer: cleaning up for shutdown..."); 831 724 if let Err(e) = self.db.flush_wal(true) { 832 725 eprintln!("rocks: flushing wal failed: {e:?}"); ··· 1795 1688 } 1796 1689 1797 1690 // TODO: add tests for key prefixes actually prefixing (bincode encoding _should_...) 1691 + 1692 + #[test] 1693 + fn rocks_started_at_persists_across_opens() -> Result<()> { 1694 + let dir = tempdir()?; 1695 + 1696 + let mut store = RocksStorage::new(dir.path())?; 1697 + store.push( 1698 + &ActionableEvent::CreateLinks { 1699 + record_id: RecordId { 1700 + did: "did:plc:asdf".into(), 1701 + collection: "a.b.c".into(), 1702 + rkey: "asdf".into(), 1703 + }, 1704 + links: vec![CollectedLink { 1705 + target: Link::Uri("e.com".into()), 1706 + path: ".uri".into(), 1707 + }], 1708 + }, 1709 + 0, 1710 + )?; 1711 + let first = store.get_stats()?.started_at; 1712 + drop(store); 1713 + 1714 + std::thread::sleep(Duration::from_millis(5)); 1715 + 1716 + let store = RocksStorage::new(dir.path())?; 1717 + let second = store.get_stats()?.started_at; 1718 + 1719 + assert_eq!(first, second, "STARTED_AT must not change across opens"); 1720 + Ok(()) 1721 + } 1798 1722 }
+7 -1
constellation/templates/hello.html.j2
··· 243 243 244 244 <p>A DID like <code>did:plc:hdhoaan3xa3jiuq4fg4mefid</code>, or an AT-URI like <code>at://did:plc:z72i7hdynmk6r22z27h6tvur/app.bsky.feed.post/3lgu4lg6j2k2v</code>, or a URI like <code>https://example.com</code>.</p> 245 245 246 + <h3>Source</h3> 247 + 248 + <p>A <em>link source</em>, made of a collection and path, like <code>app.bsky.feed.like:subject<code>. The <code>:</code> separates them. 249 + 246 250 <h3>Collection</h3> 247 251 248 252 <p>A record NSID like <code>app.bsky.feed.like</code>.</p> 249 253 250 254 <h3>Path</h3> 251 255 252 - <p>A (currently-very-very-hacky) json-path-ish representation of the source of a link in a record. Records may contain multiple links with different meanings, so this specifies which specific link is of interest. Like <code>.subject.uri</code>.</p> 256 + <p>A (currently-hacky) json-path-ish representation of the source of a link in a record. Records may contain multiple links with different meanings, so this specifies which specific link is of interest. Like <code>.subject.uri</code>.</p> 257 + 258 + <p>A special path, <code>.</code>, represents <em>the record's <code>rkey</code></em>. Tangled's vouch system puts the vouch subject in the <code>rkey</code> instead of inside the actual record. Its link source looks like this: <code>sh.tangled.graph.vouch:.</code></p> 253 259 254 260 <h3>Cursor</h3> 255 261
+7 -1
jetstream/src/error.rs
··· 26 26 #[error("failed to construct url: {0}")] 27 27 InvalidEndpointUri(#[from] tokio_tungstenite::tungstenite::http::uri::InvalidUri), 28 28 #[error("failed to connect to Jetstream instance: {0}")] 29 - WebSocketFailure(#[from] tokio_tungstenite::tungstenite::Error), 29 + WebSocketFailure(Box<tokio_tungstenite::tungstenite::Error>), 30 30 #[error("the Jetstream config is invalid (this really should not happen here): {0}")] 31 31 InvalidConfig(#[from] ConfigValidationError), 32 + } 33 + 34 + impl From<tokio_tungstenite::tungstenite::Error> for ConnectionError { 35 + fn from(e: tokio_tungstenite::tungstenite::Error) -> Self { 36 + Self::WebSocketFailure(Box::new(e)) 37 + } 32 38 } 33 39 34 40 /// Possible errors that can occur when receiving events from a Jetstream instance over WebSockets.
-2
links/Cargo.toml
··· 4 4 edition = "2021" 5 5 6 6 [dependencies] 7 - anyhow = "1.0.95" 8 7 fluent-uri = "0.3.2" 9 - nom = "7.1.3" 10 8 thiserror = "2.0.9" 11 9 tinyjson = "2.5.1"
+44
links/src/did.rs
··· 7 7 // for now, just working through the rules laid out in the docs in order, 8 8 // without much regard for efficiency for now. 9 9 10 + // newer specs say max 2048 chars 11 + if s.len() > 2048 { 12 + return None; 13 + } 14 + 10 15 // The entire URI is made up of a subset of ASCII, containing letters (A-Z, a-z), 11 16 // digits (0-9), period, underscore, colon, percent sign, or hyphen (._:%-) 12 17 if !s ··· 68 73 #[cfg(test)] 69 74 mod tests { 70 75 use super::*; 76 + 77 + #[test] 78 + fn test_did_too_long() { 79 + let long = concat!( 80 + "did:long:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 81 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 82 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 83 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 84 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 85 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 86 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 87 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 88 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 89 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 90 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 91 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 92 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 93 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 94 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 95 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 96 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 97 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 98 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 99 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 100 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 101 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 102 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 103 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 104 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 105 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 106 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 107 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 108 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 109 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 110 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 111 + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 112 + ); 113 + assert_eq!(parse_did(long), None); 114 + } 71 115 72 116 #[test] 73 117 fn test_did_parse() {
+10 -1
links/src/lib.rs
··· 120 120 assert_eq!( 121 121 parse_any_link("did:plc:44ybard66vv44zksje25o7dz"), 122 122 Some(Link::Did("did:plc:44ybard66vv44zksje25o7dz".into())) 123 - ) 123 + ); 124 + 125 + assert_eq!( 126 + parse_any_link("tel:5551234567"), 127 + Some(Link::Uri("tel:5551234567".into())), 128 + ); 129 + 130 + assert_eq!(parse_any_link("3jwdwj2ctlk26"), None); 131 + assert_eq!(parse_any_link("self"), None); 132 + assert_eq!(parse_any_link(""), None); 124 133 } 125 134 126 135 #[test]
+1 -1
spacedust/Cargo.toml
··· 16 16 log = "0.4.27" 17 17 metrics = "0.24.2" 18 18 metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] } 19 - rand = "0.9.1" 19 + rand = "0.9.3" 20 20 schemars = "0.8.22" 21 21 semver = "1.0.26" 22 22 serde = { version = "1.0.219", features = ["derive"] }
+5 -1
ufos/Cargo.toml
··· 8 8 async-trait = "0.1.88" 9 9 base64 = "0.22.1" 10 10 bincode = { version = "2.0.1", features = ["serde"] } 11 - cardinality-estimator-safe = { version = "4.0.2", features = ["with_serde", "with_digest"] } 11 + cardinality-estimator-safe = { version = "4.0.3", features = ["with_serde", "with_digest"] } 12 12 chrono = { version = "0.4.41", features = ["serde"] } 13 13 clap = { workspace = true } 14 14 dropshot = "0.16.0" ··· 33 33 34 34 [target.'cfg(not(target_env = "msvc"))'.dependencies] 35 35 tikv-jemallocator = "0.6.0" 36 + 37 + [[bin]] 38 + name = "analyze" 39 + path = "src/bin/analyze.rs" 36 40 37 41 [dev-dependencies] 38 42 tempfile = "3.19.1"
+170
ufos/src/bin/analyze.rs
··· 1 + use cardinality_estimator_safe::Sketch; 2 + use chrono::{DateTime, Utc}; 3 + use clap::{Parser, Subcommand}; 4 + use fjall::{Config, PartitionCreateOptions, PartitionHandle}; 5 + use std::collections::BTreeMap; 6 + use std::path::PathBuf; 7 + use ufos::db_types::{db_complete, DbBytes}; 8 + use ufos::store_types::{ 9 + AllTimeRollupKey, AllTimeRollupStaticPrefix, CountsValue, WeekTruncatedCursor, WeeklyRollupKey, 10 + WeeklyRollupStaticPrefix, WithCollection, 11 + }; 12 + 13 + #[derive(Parser)] 14 + #[command(about = "One-off data analysis of ufos rollup data")] 15 + struct Cli { 16 + /// Path to the fjall data directory 17 + data: PathBuf, 18 + #[command(subcommand)] 19 + command: Command, 20 + } 21 + 22 + #[derive(Subcommand)] 23 + enum Command { 24 + /// Total estimated distinct users across all time and every group 25 + TotalUsers, 26 + /// Weekly estimated distinct users (excluding app.bsky.*/chat.bsky.*) 27 + WeeklyUsers, 28 + /// Weekly count of groups with >10 estimated distinct users (excluding app.bsky.*/chat.bsky.*) 29 + WeeklyGroups, 30 + /// Like weekly-groups but with the last NSID segment removed 31 + WeeklyParents, 32 + } 33 + 34 + fn week_label(week: WeekTruncatedCursor) -> String { 35 + let us = week.to_raw_u64(); 36 + let secs = (us / 1_000_000) as i64; 37 + let dt = DateTime::<Utc>::from_timestamp(secs, 0).unwrap(); 38 + dt.format("%Y-%m-%d").to_string() 39 + } 40 + 41 + fn is_excluded(nsid: &str) -> bool { 42 + nsid.starts_with("app.bsky.") || nsid.starts_with("chat.bsky.") 43 + } 44 + 45 + fn parent_prefix(nsid: &str) -> &str { 46 + let Some((pre, _)) = nsid.rsplit_once('.') else { 47 + eprintln!("no segments in nsid? nsid={nsid}"); 48 + return nsid; 49 + }; 50 + pre 51 + } 52 + 53 + fn total_users(rollups: &PartitionHandle) -> anyhow::Result<()> { 54 + eprintln!("scanning all-time rollups..."); 55 + let mut global_sketch = Sketch::<14>::default(); 56 + let mut all_time_count = 0u64; 57 + let prefix_bytes = AllTimeRollupStaticPrefix::default().to_db_bytes()?; 58 + for kv in rollups.prefix(prefix_bytes) { 59 + let (key_bytes, val_bytes) = kv?; 60 + let _key = db_complete::<AllTimeRollupKey>(&key_bytes)?; 61 + let val = db_complete::<CountsValue>(&val_bytes)?; 62 + global_sketch.merge(val.dids()); 63 + all_time_count += 1; 64 + } 65 + println!("groups scanned: {all_time_count}"); 66 + println!("estimated distinct users: {}", global_sketch.estimate()); 67 + Ok(()) 68 + } 69 + 70 + /// Scan weekly rollups once, returning week -> (merged sketch, per-group entries) 71 + /// Only non-excluded groups are included. 72 + #[expect(clippy::type_complexity)] 73 + fn scan_weekly( 74 + rollups: &PartitionHandle, 75 + ) -> anyhow::Result<BTreeMap<u64, (Sketch<14>, Vec<(String, u64)>)>> { 76 + eprintln!("scanning weekly rollups..."); 77 + let mut weekly_data: BTreeMap<u64, (Sketch<14>, Vec<(String, u64)>)> = BTreeMap::new(); 78 + let prefix_bytes = WeeklyRollupStaticPrefix::default().to_db_bytes()?; 79 + let mut scanned = 0u64; 80 + for kv in rollups.prefix(prefix_bytes) { 81 + let (key_bytes, val_bytes) = kv?; 82 + let key = db_complete::<WeeklyRollupKey>(&key_bytes)?; 83 + let val = db_complete::<CountsValue>(&val_bytes)?; 84 + let week_us = key.cursor().to_raw_u64(); 85 + let nsid_str = key.collection().to_string(); 86 + let estimate = val.dids().estimate() as u64; 87 + 88 + let entry = weekly_data 89 + .entry(week_us) 90 + .or_insert_with(|| (Sketch::<14>::default(), Vec::new())); 91 + 92 + if !is_excluded(&nsid_str) { 93 + entry.0.merge(val.dids()); 94 + entry.1.push((nsid_str, estimate)); 95 + } 96 + 97 + scanned += 1; 98 + if scanned.is_multiple_of(500_000) { 99 + eprintln!(" ...scanned {scanned} weekly entries"); 100 + } 101 + } 102 + eprintln!(" total weekly entries scanned: {scanned}"); 103 + Ok(weekly_data) 104 + } 105 + 106 + fn weekly_users(rollups: &PartitionHandle) -> anyhow::Result<()> { 107 + let weekly_data = scan_weekly(rollups)?; 108 + println!("week\test_users"); 109 + for (&week_us, (sketch, _)) in &weekly_data { 110 + let week = WeekTruncatedCursor::try_from_raw_u64(week_us)?; 111 + println!("{}\t{}", week_label(week), sketch.estimate()); 112 + } 113 + Ok(()) 114 + } 115 + 116 + fn weekly_groups(rollups: &PartitionHandle) -> anyhow::Result<()> { 117 + let weekly_data = scan_weekly(rollups)?; 118 + println!("week\tgroups"); 119 + for (&week_us, (_, entries)) in &weekly_data { 120 + let week = WeekTruncatedCursor::try_from_raw_u64(week_us)?; 121 + let count = entries.iter().filter(|(_, est)| *est > 10).count(); 122 + println!("{}\t{}", week_label(week), count); 123 + } 124 + Ok(()) 125 + } 126 + 127 + fn weekly_parents(rollups: &PartitionHandle) -> anyhow::Result<()> { 128 + let weekly_data = scan_weekly(rollups)?; 129 + println!("week\tparents\ttop parent prefixes"); 130 + for (&week_us, (_, entries)) in &weekly_data { 131 + let week = WeekTruncatedCursor::try_from_raw_u64(week_us)?; 132 + let mut parent_counts: BTreeMap<&str, usize> = BTreeMap::new(); 133 + for (nsid, est) in entries { 134 + if *est > 10 { 135 + let parent = parent_prefix(nsid); 136 + *parent_counts.entry(parent).or_default() += 1; 137 + } 138 + } 139 + let total_parents = parent_counts.len(); 140 + let mut sorted: Vec<_> = parent_counts.into_iter().collect(); 141 + sorted.sort_by_key(|c| std::cmp::Reverse(c.1)); 142 + let top: Vec<String> = sorted 143 + .iter() 144 + .take(5) 145 + .map(|(prefix, count)| format!("{prefix}({count})")) 146 + .collect(); 147 + println!( 148 + "{}\t{}\t{}", 149 + week_label(week), 150 + total_parents, 151 + top.join(", ") 152 + ); 153 + } 154 + Ok(()) 155 + } 156 + 157 + fn main() -> anyhow::Result<()> { 158 + let cli = Cli::parse(); 159 + 160 + eprintln!("opening db at {:?}...", cli.data); 161 + let keyspace = Config::new(&cli.data).open()?; 162 + let rollups = keyspace.open_partition("rollups", PartitionCreateOptions::default())?; 163 + 164 + match cli.command { 165 + Command::TotalUsers => total_users(&rollups), 166 + Command::WeeklyUsers => weekly_users(&rollups), 167 + Command::WeeklyGroups => weekly_groups(&rollups), 168 + Command::WeeklyParents => weekly_parents(&rollups), 169 + } 170 + }
+3 -3
ufos/src/error.rs
··· 28 28 InitError(String), 29 29 #[error("DB seems to be in a bad state: {0}")] 30 30 BadStateError(String), 31 - #[error("Fjall error")] 31 + #[error("Fjall error: {0}")] 32 32 FjallError(#[from] fjall::Error), 33 - #[error("LSM-tree error (from fjall)")] 33 + #[error("LSM-tree error (from fjall): {0}")] 34 34 FjallLsmError(#[from] fjall::LsmError), 35 - #[error("Bytes encoding error")] 35 + #[error("Bytes encoding error: {0}")] 36 36 EncodingError(#[from] EncodingError), 37 37 #[error("If you ever see this, there's a bug in the code. The error was stolen")] 38 38 Stolen,
+8 -3
ufos/src/storage_fjall.rs
··· 1205 1205 1206 1206 let mut dirty_nsids = HashSet::new(); 1207 1207 1208 - #[derive(Eq, Hash, PartialEq)] 1208 + #[derive(Debug, Eq, Hash, PartialEq)] 1209 1209 enum Rollup { 1210 1210 Hourly(HourTruncatedCursor), 1211 1211 Weekly(WeekTruncatedCursor), ··· 1235 1235 dirty_nsids.insert(key.collection().clone()); 1236 1236 1237 1237 batch.remove(&self.rollups, key_bytes); 1238 - let val = db_complete::<CountsValue>(&val_bytes)?; 1238 + let val = db_complete::<CountsValue>(&val_bytes).inspect_err(|e| { 1239 + log::error!("bad CountsValue at {key:?} from rolling up timelies: {e}") 1240 + })?; 1239 1241 counts_by_rollup 1240 1242 .entry(( 1241 1243 key.collection().clone(), ··· 1275 1277 .get(&rollup_key_bytes)? 1276 1278 .as_deref() 1277 1279 .map(db_complete::<CountsValue>) 1278 - .transpose()? 1280 + .transpose() 1281 + .inspect_err(|e| { 1282 + log::error!("bad CountsValue at {nsid:?}/{rollup:?} from counts_by_rollup: {e}") 1283 + })? 1279 1284 .unwrap_or_default(); 1280 1285 1281 1286 // now that we have values, we can know the exising ranks