Monorepo for Tangled tangled.org
2

Configure Feed

Select the types of activity you want to include in your feed.

spindle,shuttle: microvm engine init

spindle,shuttle,nix: add an alpine microvm image
spindle/microvm: allow user defined binary caches in workflows
shuttle,nix/microvm: get rid of the hacky nix config parsing / rendering, use nix directly so we can access module options
spindle/engine: generalize scheduler out of microvm, make it work-conserving with aging and per-user fairness
spindle/microvm: add resource budget limits and optional cgroup enforcement

Signed-off-by: dawn <dawn@tangled.org>

author
dawn
committer
Tangled
date (Jun 18, 2026, 2:57 PM +0300) commit afd329ee parent 091d599e change-id wlpskvtz
+14027 -326
+2
.gitignore
··· 4 4 *.db-* 5 5 .bin/ 6 6 appview/pages/static/* 7 + spindle/spindle 8 + spindle/spindle-microvm-run 7 9 result 8 10 !.gitkeep 9 11 !appview/pages/static/topbar-search.js
+297 -1
Cargo.lock
··· 117 117 ] 118 118 119 119 [[package]] 120 + name = "antlr4rust" 121 + version = "0.5.2" 122 + source = "registry+https://github.com/rust-lang/crates.io-index" 123 + checksum = "093d520274bfff7278d776f7ea12981a0a0a6f96db90964658e0f38fc6e9a6a6" 124 + dependencies = [ 125 + "better_any", 126 + "bit-set", 127 + "byteorder", 128 + "lazy_static", 129 + "murmur3", 130 + "once_cell", 131 + "parking_lot", 132 + "typed-arena", 133 + "uuid", 134 + ] 135 + 136 + [[package]] 120 137 name = "anyhow" 121 138 version = "1.0.102" 122 139 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 270 287 version = "1.8.3" 271 288 source = "registry+https://github.com/rust-lang/crates.io-index" 272 289 checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" 290 + 291 + [[package]] 292 + name = "better_any" 293 + version = "0.2.1" 294 + source = "registry+https://github.com/rust-lang/crates.io-index" 295 + checksum = "4372b9543397a4b86050cc5e7ee36953edf4bac9518e8a774c2da694977fb6e4" 296 + 297 + [[package]] 298 + name = "bit-set" 299 + version = "0.8.0" 300 + source = "registry+https://github.com/rust-lang/crates.io-index" 301 + checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" 302 + dependencies = [ 303 + "bit-vec", 304 + ] 305 + 306 + [[package]] 307 + name = "bit-vec" 308 + version = "0.8.0" 309 + source = "registry+https://github.com/rust-lang/crates.io-index" 310 + checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" 273 311 274 312 [[package]] 275 313 name = "bitflags" ··· 658 696 ] 659 697 660 698 [[package]] 699 + name = "cel" 700 + version = "0.12.0" 701 + source = "registry+https://github.com/rust-lang/crates.io-index" 702 + checksum = "ca1e5eda1b0f8476181bed1bfc9232a91d62ff0b9f1bc0e48afff3cbcb5b0b5c" 703 + dependencies = [ 704 + "antlr4rust", 705 + "base64", 706 + "chrono", 707 + "lazy_static", 708 + "nom", 709 + "paste", 710 + "regex", 711 + "serde", 712 + "serde_json", 713 + "thiserror 1.0.69", 714 + ] 715 + 716 + [[package]] 661 717 name = "census" 662 718 version = "0.4.2" 663 719 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1281 1337 version = "0.1.9" 1282 1338 source = "registry+https://github.com/rust-lang/crates.io-index" 1283 1339 checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" 1340 + 1341 + [[package]] 1342 + name = "fixedbitset" 1343 + version = "0.5.7" 1344 + source = "registry+https://github.com/rust-lang/crates.io-index" 1345 + checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" 1284 1346 1285 1347 [[package]] 1286 1348 name = "flate2" ··· 2263 2325 ] 2264 2326 2265 2327 [[package]] 2328 + name = "memoffset" 2329 + version = "0.9.1" 2330 + source = "registry+https://github.com/rust-lang/crates.io-index" 2331 + checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" 2332 + dependencies = [ 2333 + "autocfg", 2334 + ] 2335 + 2336 + [[package]] 2266 2337 name = "miette" 2267 2338 version = "7.6.0" 2268 2339 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2340 2411 ] 2341 2412 2342 2413 [[package]] 2414 + name = "multimap" 2415 + version = "0.10.1" 2416 + source = "registry+https://github.com/rust-lang/crates.io-index" 2417 + checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" 2418 + 2419 + [[package]] 2420 + name = "murmur3" 2421 + version = "0.4.1" 2422 + source = "registry+https://github.com/rust-lang/crates.io-index" 2423 + checksum = "a198f9589efc03f544388dfc4a19fe8af4323662b62f598b8dcfdac62c14771c" 2424 + dependencies = [ 2425 + "byteorder", 2426 + ] 2427 + 2428 + [[package]] 2343 2429 name = "murmurhash32" 2344 2430 version = "0.3.1" 2345 2431 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2373 2459 ] 2374 2460 2375 2461 [[package]] 2462 + name = "nix" 2463 + version = "0.31.3" 2464 + source = "registry+https://github.com/rust-lang/crates.io-index" 2465 + checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" 2466 + dependencies = [ 2467 + "bitflags", 2468 + "cfg-if", 2469 + "cfg_aliases", 2470 + "libc", 2471 + "memoffset", 2472 + ] 2473 + 2474 + [[package]] 2376 2475 name = "nom" 2377 2476 version = "7.1.3" 2378 2477 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2445 2544 version = "0.2.1" 2446 2545 source = "registry+https://github.com/rust-lang/crates.io-index" 2447 2546 checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" 2547 + 2548 + [[package]] 2549 + name = "ordered-float" 2550 + version = "2.10.1" 2551 + source = "registry+https://github.com/rust-lang/crates.io-index" 2552 + checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" 2553 + dependencies = [ 2554 + "num-traits", 2555 + ] 2448 2556 2449 2557 [[package]] 2450 2558 name = "ordered-float" ··· 2570 2678 checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" 2571 2679 2572 2680 [[package]] 2681 + name = "petgraph" 2682 + version = "0.8.3" 2683 + source = "registry+https://github.com/rust-lang/crates.io-index" 2684 + checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" 2685 + dependencies = [ 2686 + "fixedbitset", 2687 + "hashbrown 0.15.5", 2688 + "indexmap", 2689 + ] 2690 + 2691 + [[package]] 2573 2692 name = "phf" 2574 2693 version = "0.11.3" 2575 2694 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2738 2857 ] 2739 2858 2740 2859 [[package]] 2860 + name = "prost" 2861 + version = "0.14.3" 2862 + source = "registry+https://github.com/rust-lang/crates.io-index" 2863 + checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" 2864 + dependencies = [ 2865 + "bytes", 2866 + "prost-derive", 2867 + ] 2868 + 2869 + [[package]] 2870 + name = "prost-build" 2871 + version = "0.14.3" 2872 + source = "registry+https://github.com/rust-lang/crates.io-index" 2873 + checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" 2874 + dependencies = [ 2875 + "heck 0.5.0", 2876 + "itertools 0.14.0", 2877 + "log", 2878 + "multimap", 2879 + "petgraph", 2880 + "prettyplease", 2881 + "prost", 2882 + "prost-types", 2883 + "regex", 2884 + "syn", 2885 + "tempfile", 2886 + ] 2887 + 2888 + [[package]] 2889 + name = "prost-derive" 2890 + version = "0.14.3" 2891 + source = "registry+https://github.com/rust-lang/crates.io-index" 2892 + checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" 2893 + dependencies = [ 2894 + "anyhow", 2895 + "itertools 0.14.0", 2896 + "proc-macro2", 2897 + "quote", 2898 + "syn", 2899 + ] 2900 + 2901 + [[package]] 2902 + name = "prost-protovalidate" 2903 + version = "0.3.0" 2904 + source = "registry+https://github.com/rust-lang/crates.io-index" 2905 + checksum = "c3807965edce2730437ec19f99eae81064d152421c4c875a3e8f9e80cf2dd234" 2906 + dependencies = [ 2907 + "cel", 2908 + "chrono", 2909 + "fluent-uri", 2910 + "prost", 2911 + "prost-protovalidate-types", 2912 + "prost-reflect", 2913 + "prost-types", 2914 + "regex", 2915 + "serde_json", 2916 + "thiserror 2.0.18", 2917 + ] 2918 + 2919 + [[package]] 2920 + name = "prost-protovalidate-types" 2921 + version = "0.3.0" 2922 + source = "registry+https://github.com/rust-lang/crates.io-index" 2923 + checksum = "ddb5c8151aa3da0ea9ba5d91b170c7763b6c7c7352c3643d57b2b28de29afde9" 2924 + dependencies = [ 2925 + "prost", 2926 + "prost-build", 2927 + "prost-reflect", 2928 + "prost-reflect-build", 2929 + "prost-types", 2930 + "thiserror 2.0.18", 2931 + ] 2932 + 2933 + [[package]] 2934 + name = "prost-reflect" 2935 + version = "0.16.4" 2936 + source = "registry+https://github.com/rust-lang/crates.io-index" 2937 + checksum = "590aa145fee8f7a26b5a6055365e7c5e89a5c1caae9869de76ec0ee73181a2f9" 2938 + dependencies = [ 2939 + "base64", 2940 + "prost", 2941 + "prost-reflect-derive", 2942 + "prost-types", 2943 + "serde", 2944 + "serde-value", 2945 + ] 2946 + 2947 + [[package]] 2948 + name = "prost-reflect-build" 2949 + version = "0.16.0" 2950 + source = "registry+https://github.com/rust-lang/crates.io-index" 2951 + checksum = "8214ae2c30bbac390db0134d08300e770ef89b6d4e5abf855e8d300eded87e28" 2952 + dependencies = [ 2953 + "prost-build", 2954 + "prost-reflect", 2955 + ] 2956 + 2957 + [[package]] 2958 + name = "prost-reflect-derive" 2959 + version = "0.16.0" 2960 + source = "registry+https://github.com/rust-lang/crates.io-index" 2961 + checksum = "7b6d90e29fa6c0d13c2c19ba5e4b3fb0efbf5975d27bcf4e260b7b15455bcabe" 2962 + dependencies = [ 2963 + "proc-macro2", 2964 + "quote", 2965 + "syn", 2966 + ] 2967 + 2968 + [[package]] 2969 + name = "prost-types" 2970 + version = "0.14.3" 2971 + source = "registry+https://github.com/rust-lang/crates.io-index" 2972 + checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" 2973 + dependencies = [ 2974 + "prost", 2975 + ] 2976 + 2977 + [[package]] 2741 2978 name = "quick_cache" 2742 2979 version = "0.6.22" 2743 2980 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3248 3485 ] 3249 3486 3250 3487 [[package]] 3488 + name = "serde-value" 3489 + version = "0.7.0" 3490 + source = "registry+https://github.com/rust-lang/crates.io-index" 3491 + checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" 3492 + dependencies = [ 3493 + "ordered-float 2.10.1", 3494 + "serde", 3495 + ] 3496 + 3497 + [[package]] 3251 3498 name = "serde_bytes" 3252 3499 version = "0.11.19" 3253 3500 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3423 3670 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 3424 3671 3425 3672 [[package]] 3673 + name = "shuttle" 3674 + version = "0.1.0" 3675 + dependencies = [ 3676 + "anyhow", 3677 + "base64", 3678 + "nix", 3679 + "once_cell", 3680 + "prost", 3681 + "prost-protovalidate", 3682 + "prost-reflect", 3683 + "serde", 3684 + "serde_json", 3685 + "tempfile", 3686 + "tokio", 3687 + "tokio-vsock", 3688 + "tracing", 3689 + "tracing-subscriber", 3690 + ] 3691 + 3692 + [[package]] 3426 3693 name = "signal-hook-registry" 3427 3694 version = "1.4.8" 3428 3695 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3706 3973 dependencies = [ 3707 3974 "fnv", 3708 3975 "nom", 3709 - "ordered-float", 3976 + "ordered-float 5.3.0", 3710 3977 "serde", 3711 3978 "serde_json", 3712 3979 ] ··· 4017 4284 ] 4018 4285 4019 4286 [[package]] 4287 + name = "tokio-vsock" 4288 + version = "0.7.2" 4289 + source = "registry+https://github.com/rust-lang/crates.io-index" 4290 + checksum = "8b319ef9394889dab2e1b4f0085b45ba11d0c79dc9d1a9d1afc057d009d0f1c7" 4291 + dependencies = [ 4292 + "bytes", 4293 + "futures", 4294 + "libc", 4295 + "tokio", 4296 + "vsock", 4297 + ] 4298 + 4299 + [[package]] 4020 4300 name = "toml" 4021 4301 version = "0.9.12+spec-1.1.0" 4022 4302 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4238 4518 ] 4239 4519 4240 4520 [[package]] 4521 + name = "typed-arena" 4522 + version = "2.0.2" 4523 + source = "registry+https://github.com/rust-lang/crates.io-index" 4524 + checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" 4525 + 4526 + [[package]] 4241 4527 name = "typeid" 4242 4528 version = "1.0.3" 4243 4529 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4369 4655 version = "0.9.5" 4370 4656 source = "registry+https://github.com/rust-lang/crates.io-index" 4371 4657 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 4658 + 4659 + [[package]] 4660 + name = "vsock" 4661 + version = "0.5.4" 4662 + source = "registry+https://github.com/rust-lang/crates.io-index" 4663 + checksum = "6ba782755fc073877e567c2253c0be48e4aa9a254c232d36d3985dfae0bd5205" 4664 + dependencies = [ 4665 + "libc", 4666 + "nix", 4667 + ] 4372 4668 4373 4669 [[package]] 4374 4670 name = "walkdir"
+2 -1
Cargo.toml
··· 1 1 [workspace] 2 2 resolver = "2" 3 - members = ["bobbin/crates/*"] 3 + members = ["bobbin/crates/*", "shuttle"] 4 4 exclude = ["sites"] 5 5 6 6 [workspace.package] ··· 14 14 "oppiliappan <me@oppi.li>", 15 15 "Anirudh Oppiliappan <anirudh@tangled.org>", 16 16 "eti <eti@eti.tf>", 17 + "dawn <dawn@tangled.org>", 17 18 ] 18 19 19 20 [workspace.lints.rust]
+8 -5
appview/pipelines/logs.go
··· 2 2 3 3 import ( 4 4 "html/template" 5 + "path" 5 6 "regexp" 6 7 "strings" 7 8 8 9 terminal "github.com/buildkite/terminal-to-html/v3" 9 10 "github.com/gorilla/websocket" 10 11 "tangled.org/core/appview/pages/markup" 12 + "tangled.org/core/hostutil" 11 13 ) 12 14 13 15 // matches any ANSI escape sequence: ESC [ <params> m ··· 79 81 } 80 82 } 81 83 82 - func SpindleURL(dev bool, spindle, knot, rkey, workflow string) string { 83 - scheme := "wss" 84 - if dev { 85 - scheme = "ws" 84 + func SpindleURL(spindle, knot, rkey, workflow string) string { 85 + url, err := hostutil.EnsureWsScheme(spindle) 86 + if err != nil { 87 + return "" 86 88 } 87 - return scheme + "://" + strings.Join([]string{spindle, "logs", knot, rkey, workflow}, "/") 89 + 90 + return url + path.Join("/logs", knot, rkey, workflow) 88 91 }
+14 -3
appview/pipelines/pipelines.go
··· 18 18 "tangled.org/core/appview/pages" 19 19 "tangled.org/core/appview/reporesolver" 20 20 "tangled.org/core/eventconsumer" 21 + "tangled.org/core/hostutil" 21 22 "tangled.org/core/idresolver" 22 23 "tangled.org/core/orm" 23 24 "tangled.org/core/rbac" ··· 222 223 return 223 224 } 224 225 225 - url := SpindleURL(p.config.Core.Dev, spindle, knot, rkey, workflow) 226 + url := SpindleURL(spindle, knot, rkey, workflow) 227 + if url == "" { 228 + http.Error(w, "invalid spindle hostname", http.StatusBadRequest) 229 + return 230 + } 226 231 l = l.With("url", url) 227 232 228 233 clientConn, err := upgrader.Upgrade(w, r, nil) ··· 421 426 return 422 427 } 423 428 429 + hostname, noTLS, err := hostutil.ParseHostname(spindle) 430 + if err != nil { 431 + http.Error(w, "invalid spindle hostname", http.StatusBadRequest) 432 + return 433 + } 434 + 424 435 spindleClient, err := p.oauth.ServiceClient( 425 436 r, 426 - oauth.WithService(f.Spindle), 437 + oauth.WithService(hostname), 427 438 oauth.WithLxm(tangled.PipelineCancelPipelineNSID), 428 - oauth.WithDev(p.config.Core.Dev), 439 + oauth.WithDev(noTLS), 429 440 oauth.WithTimeout(time.Second*30), // workflow cleanup usually takes time 430 441 ) 431 442
+1 -1
appview/pipelines/ssh/tui.go
··· 113 113 if !ok || len(ws.Data) == 0 { 114 114 return logDoneMsg{workflow: workflow} 115 115 } 116 - url := pipelines.SpindleURL(m.server.config.Core.Dev, ws.Data[0].Spindle, m.pipeline.Knot, m.pipeline.Rkey, workflow) 116 + url := pipelines.SpindleURL(ws.Data[0].Spindle, m.pipeline.Knot, m.pipeline.Rkey, workflow) 117 117 conn, _, err := websocket.DefaultDialer.Dial(url, nil) 118 118 if err != nil { 119 119 return logDoneMsg{workflow: workflow, err: fmt.Errorf("connecting to spindle: %w", err)}
+1 -1
appview/state/knotstream.go
··· 56 56 57 57 return bootstrapStream( 58 58 ctx, "knotstream", ec.KindKnot, hosts, c.Redis.Addr, 59 - c.Knotstream, c.Core.Dev, 59 + c.Knotstream, 60 60 knotIngester(d, acl, enforcer, posthog, notifier, c.Core.Dev, c, cfClient), 61 61 ), nil 62 62 }
+1 -1
appview/state/spindlestream.go
··· 33 33 34 34 return bootstrapStream( 35 35 ctx, "spindlestream", ec.KindSpindle, hosts, c.Redis.Addr, 36 - c.Spindlestream, c.Core.Dev, 36 + c.Spindlestream, 37 37 spindleIngester(d, pn), 38 38 ), nil 39 39 }
+1 -2
appview/state/spindlestream_test.go
··· 55 55 }) 56 56 srv := httptest.NewServer(mux) 57 57 t.Cleanup(srv.Close) 58 - source := ec.Source{Kind: "test", Host: strings.TrimPrefix(srv.URL, "http://")} 58 + source := ec.Source{Kind: "test", Host: strings.TrimPrefix(srv.URL, "http://"), NoTLS: true} 59 59 60 60 appviewDB, err := db.Make(ctx, filepath.Join(t.TempDir(), "appview.db")) 61 61 if err != nil { ··· 72 72 QueueSize: 16, 73 73 ConnectionTimeout: 2 * time.Second, 74 74 CursorStore: &cursor.MemoryStore{}, 75 - URLFunc: ec.DefaultURL(true), 76 75 Logger: logger, 77 76 } 78 77 c := ec.NewConsumer(cfg)
-2
appview/state/streams.go
··· 17 17 hosts []string, 18 18 redisAddr string, 19 19 streamCfg config.ConsumerConfig, 20 - dev bool, 21 20 processFn ec.ProcessFunc, 22 21 ) *ec.Consumer { 23 22 logger := log.SubLogger(log.FromContext(ctx), name) ··· 41 40 WorkerCount: streamCfg.WorkerCount, 42 41 QueueSize: streamCfg.QueueSize, 43 42 Logger: logger, 44 - URLFunc: ec.DefaultURL(dev), 45 43 CursorStore: &cursorStore, 46 44 }) 47 45 }
+277
blog/posts/spindle-microvm.md
··· 1 + --- 2 + atroot: true 3 + template: 4 + slug: spindle-microvm 5 + title: How the microVM engine comes together 6 + subtitle: spindle has a microVM engine now! 7 + date: 2026-06-16 8 + image: https://assets.tangled.network/blog/seed.png 9 + authors: 10 + - name: dawn 11 + email: dawn@tangled.org 12 + handle: ptr.pet 13 + --- 14 + 15 + Since launching, [spindle](/ci) has run your CI inside Docker containers created 16 + with nixery. That's been mostly okay if you are doing simple things, but if you 17 + wanted to do anything more outside the box (maybe you wanted some services, or 18 + to build & test containers inside), or if you wanted to use Nix inside it (which 19 + is rough :P), it wouldn't meet your needs. That changes today! 20 + 21 + spindle gains a microVM engine. Each workflow gets its own little virtual 22 + machine. You get a full environment inside your workflows that you can do 23 + whatever you want with without any of the roughness of nixery containers. 24 + Alongside this, you also get the ability to configure *services* that a workflow 25 + will have (on the NixOS image), so that means you can easily have postgres, 26 + Docker, and so on that will be alive through the workflow. 27 + 28 + ## what's in a microVM 29 + 30 + A microVM is just a VM with most of the boring parts removed. There's no BIOS, 31 + no PCI bus to probe, no emulated graphics card, none of the slow legacy stuff a 32 + normal QEMU machine drags along for example. You get virtio devices and not much 33 + else, which means it boots very quickly and uses very little memory. Right now 34 + QEMU is the only runner we support, but the engine is written so that other 35 + runners (firecracker for example) can slot in later. 36 + 37 + Inside the guest there's a small piece of software we call the agent. Spindle 38 + never SSHes in or runs commands "from the outside"; instead the agent dials back 39 + to spindle over vsock the moment it boots, says hello, and from then on every 40 + step of your workflow is sent to it as a message. The agent runs the command as 41 + an unprivileged user, streams stdout and stderr back, and reports the exit code. 42 + The host side of this lives in 43 + [`spindle`](https://tangled.org/tangled.org/core/tree/master/spindle/engines/microvm/agent.go) 44 + and the guest side is a little Rust binary called 45 + [`shuttle`](https://tangled.org/tangled.org/core/tree/master/shuttle). 46 + (`shuttle` implements 47 + [`agentproto`](https://tangled.org/tangled.org/core/tree/master/spindle/) which 48 + is the protocol used by `spindle`. Technically speaking anyone could implement 49 + this and, assuming side effects hold, you could have your own agent!) 50 + 51 + ## two kinds of images 52 + 53 + There are two "flavours" of image you can boot, and they're aimed at fairly 54 + different people. 55 + 56 + The first is **NixOS images**. These are the interesting ones: because the whole 57 + guest is built with Nix, you can configure it from your workflow file directly. 58 + Things like `dependencies`, `services`, `virtualisation` (e.g. Docker), 59 + `registry` and `caches` are all written right there in the YAML, and the guest 60 + agent builds and activates that config before any of your steps run. If we've 61 + built that exact base plus config before, spindle can just hand the guest a 62 + store path to realize (fetching from whatever cache `spindle` has configured) 63 + instead of rebuilding it, so the second run is quick. 64 + 65 + The second is **non-NixOS images**, which today just means Alpine, but can be 66 + anything. You don't get the workflow-level NixOS config here (there's no NixOS 67 + to configure), but if Nix happens to exist inside the image, like it does in our 68 + Alpine one, it can still talk to the spindle Nix cache just fine. 69 + 70 + ### example nixos workflow 71 + 72 + If you've used spindle before this will look familiar, it's the same manifest you 73 + already know, just with a few extra keys that the NixOS image understands. Here's 74 + a workflow that needs postgres to test against and Docker to build an image: 75 + 76 + ```yaml 77 + # .tangled/workflows/test.yaml 78 + engine: microvm 79 + 80 + when: 81 + - event: ["push", "pull_request"] 82 + branch: ["master"] 83 + 84 + image: nixos 85 + 86 + dependencies: 87 + - go 88 + - github:nixos/nixpkgs#hello 89 + 90 + registry: 91 + nixpkgs: github:nixos/nixpkgs/nixos-unstable 92 + 93 + caches: 94 + https://nix-community.cachix.org: "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=" 95 + 96 + services: 97 + postgresql: 98 + enable: true 99 + ensureDatabases: ["spindle-workflow"] 100 + ensureUsers: 101 + - name: spindle-workflow 102 + ensureDBOwnership: true 103 + 104 + virtualisation: 105 + docker: true 106 + 107 + steps: 108 + - name: run tests 109 + environment: 110 + PGHOST: /run/postgresql 111 + command: | 112 + docker build -t app . 113 + psql -c "select 1" 114 + go test ./... 115 + ``` 116 + 117 + `dependencies` are packages that are added to `environment.systemPackages` (so, 118 + `PATH`). A bare name like `go` is looked up in nixpkgs (same as regular 119 + spindle), but you can also point at any flake with the `flakeref#attr` syntax, 120 + so `github:nixos/nixpkgs#hello` pulls `hello` straight out of that flake. 121 + `registry` is how you remap the global refs: here we pin `nixpkgs` to 122 + `nixos-unstable`, so now the bare `go` above resolves from unstable. You can 123 + alias your own flakes the same way (`myflake: github:me/x`, then `myflake#tool` 124 + in `dependencies`). `caches` is a map of binary cache URL to its trusted public 125 + key, and they get wired into the read proxy (more on that later), so the guest 126 + can substitute prebuilt paths from them instead of building everything from 127 + scratch. 128 + 129 + `services` and `virtualisation` are the interesting parts: they're passed 130 + straight through to NixOS, so anything you could write in a NixOS config you can 131 + write here. `services.postgresql.enable` brings postgres up before any of your 132 + steps run. Since steps run as the `spindle-workflow` user, naming a database 133 + after that user with `ensureDBOwnership` is the easy path to a working db - 134 + postgres peer auth maps the unix user straight to the matching role, so `psql` 135 + connects over the socket with no password and no extra setup (this name-matching 136 + is a NixOS requirement for `ensureDBOwnership`, if you want a differently named 137 + db you'd grant access yourself). `virtualisation.docker: true` is shorthand for 138 + `virtualisation.docker.enable = true`, which gets you a real Docker daemon 139 + inside the VM. By the time your first step runs, postgres is listening and the 140 + Docker socket is there, no sidecar dance, it's just part of the machine. 141 + 142 + (`true` works as shorthand for `.enable = true` anywhere an `enable` option 143 + exists, so most "just turn this on" services are a one-liner!) 144 + 145 + ## building the images 146 + 147 + Image builds are done with Nix. For NixOS we lean on 148 + [microvm.nix](https://github.com/microvm-nix/microvm.nix) and layer our own bits 149 + on top (stripping down kernel modules, configuring users, etc.). For Alpine 150 + there's a smallish Nix definition that fetches the kernel, the initrd and the 151 + kernel modules, sets up an init script that configures the machine on boot, 152 + copies in the dependencies we want (`nix`, `git`, etc.) and compresses the whole 153 + rootfs into a squashfs. 154 + 155 + None of this *has* to be Nix, though. As far as spindle is concerned an image is 156 + valid as long as a few things hold: a guest agent (that implements `agentproto`) 157 + is present and gets started on boot, a `spindle-workflow` user exists, and the 158 + work directory is set up at `/workspace`. That can be built however you like. 159 + 160 + ## finding an image 161 + 162 + Every built image ships a `spec.json` next to its artifacts. The spec is the 163 + whole contract: where the kernel and initrd and read-only store disk live, the 164 + boot args, how much memory and how many vCPUs to give it, the shell to run steps 165 + in, the writable volumes, the network interfaces, and the runner-specific knobs 166 + (machine type, CPU, extra QEMU args). NixOS images also carry a `baseConfigHash` 167 + identifying the base config baked in (this is the hash of 168 + `nixosSystem.config.system.build.toplevel.outPath`). 169 + 170 + A workflow picks an image with the `image` key at the top level. The name is 171 + matched literally against what's on disk, we look for a directory called 172 + `<name>` with a `spec.json` in it, then fall back to a flat `<name>.json`. The 173 + nice property here is that resolution depends *only* on the name and what's on 174 + disk, never on the host doing the resolving, so the same workflow resolves to 175 + the same image on every spindle. If an operator keeps multiple arches side by 176 + side they can name them `nixos-x86_64`, `alpine-aarch64` and so on (that suffix 177 + is just part of the name, it's not handled specially). If you want, for example, 178 + `nixos` to work, you can just symlink `nixos` to `nixos-x86_64`. 179 + 180 + Right before launch we double-check the referenced files actually exist 181 + and that the host has the tools we need: `mkfs.ext4` for the volumes, the 182 + QEMU binary for the spec's arch, `/dev/kvm` and `/dev/vhost-vsock`, plus 183 + the `ip` / `mount` / `slirp4netns` / `unshare` toolchain if the image 184 + wants networking. 185 + 186 + ## the life of a workflow 187 + 188 + A workflow moves through a handful of stages: it gets parsed and its 189 + image resolved, it waits for a slot, it gets set up, its steps ran, and 190 + then everything is torn down. 191 + 192 + The waiting bit matters a lot. Each image declares how much memory, how many 193 + vCPUs and how much disk it needs, and a workflow has to acquire a slot from a 194 + resource scheduler before anything boots. The scheduler is work-conserving with 195 + aging and per-user fairness, so one person submitting a hundred jobs won't 196 + starve everyone else, and slots don't sit idle if there's work that fits in the 197 + budget. 198 + 199 + Once a slot is acquired, we do the setup. Spindle allocates a random vsock CID 200 + for the guest and registers it with the agent hub. It creates the per-workflow 201 + work directory, starts the two cache proxies (more on those later), then creates 202 + the VM: writable volumes become sparse files formatted ext4, the store disk is 203 + attached read-only, and QEMU is started with `-sandbox on`, `-nodefaults`, no 204 + display, no monitor, etc. with serial / `virtio_console` output to a log file 205 + and a QMP socket for control. 206 + 207 + Then we wait for the machine. We poll QMP until QEMU says the guest is running, 208 + then wait for the agent's handshake to arrive over vsock from the CID we expect. 209 + The agent tells us its protocol and versions, and spindle sends back the job id, 210 + the trusted cache public keys and the cache proxy ports, NixOS config if already 211 + cached... From there steps run one at a time as `$shell -lc <command>`, as the 212 + unprivileged workflow user in `/workspace/repo`, with the right environment and 213 + any unlocked secrets. 214 + 215 + Timeouts are cooperative: we work out a deadline from the workflow timeout 216 + and ship it to the guest, with a little grace on our side so the guest 217 + gets a chance to report the timeout itself rather than us just yanking the 218 + machine out from under it. And if the VM crashes mid-step we tail the 219 + serial and QEMU logs into the step's stderr, because "guest agent 220 + connection lost: EOF" is a genuinely useless thing to read at 2am. 221 + 222 + Teardown is the same whether the workflow passed, failed or timed out: 223 + drain any pending Nix cache uploads, ask the agent to power off, wait for 224 + QEMU to exit (falling back to a QMP `system_powerdown`, and finally a 225 + kill if it's being stubborn), then close the proxies and remove the work 226 + directory. 227 + 228 + ## locking down the network 229 + 230 + A VM that can reach the host's local network is a VM that can reach things it 231 + has no business reaching. So QEMU doesn't run in the host's network namespace at 232 + all. We `unshare` into fresh user, net and mount namespaces first. Inside that 233 + namespace a small wrapper bind-mounts a resolv.conf pointing at the slirp DNS 234 + and installs blackhole routes for every special-use IP range (RFC 6890, so 235 + private networks, link-local, loopback, etc.) before it execs QEMU. 236 + `slirp4netns` then provides outbound connectivity for that namespace, with 237 + `--disable-host-loopback`, sandbox and seccomp all on. The guest itself sits 238 + behind a *second* layer of QEMU user-mode networking inside that namespace. All 239 + of this is done without needing any privileges! 240 + 241 + ## budgets and cgroups 242 + 243 + The scheduler's budget is bookkeeping on its own, it tracks what it's handed 244 + out, and the runner (QEMU) will ensure that a workflow only gets those. But 245 + optionally the whole thing (QEMU and slirp4netns both) gets placed in a 246 + per-workflow cgroup with memory, swap etc. limits, which is an extra 247 + enforcement layer on top. A nice side effect is when the cgroup OOM-kills the VM 248 + we can see that it was an OOM and report it as such, instead of surfacing it as 249 + a generic crash and leaving you guessing. 250 + 251 + The spindle itself also gets a cgroup, which means that in a host OOM situation, 252 + it should be the workflows that die first, not the spindle itself. 253 + 254 + ## the nix cache, both ways 255 + 256 + The two proxies I mentioned during setup are how the guest talks to spindle's 257 + Nix cache, and they run on the host so the guest never needs credentials or 258 + direct network access to do it. Like the agent, they also use vsock to 259 + communicate with the spindle. 260 + 261 + The read proxy fans out to the configured substituters plus any caches you 262 + listed in your workflow, so when the guest needs to realize a store path it asks 263 + the proxy and the proxy fetches it. The request is sent concurrently to the read 264 + caches, so the one that answers it first wins. 265 + 266 + The upload proxy goes the other way: any path built inside the guest gets pushed 267 + back out to spindle's Nix cache (if one is configured), so the next workflow 268 + that needs it doesn't have to build it again. Any paths that already exist on 269 + any of the configured read caches won't be uploaded. Built paths are queued by 270 + the agent and are immediately uploaded. If any paths are still left when we 271 + reach VM teardown, the workflow will wait until everything is uploaded. 272 + 273 + ## in the future 274 + 275 + todo 276 + 277 + Feel free to come and ask any questions you might have on https://chat.tangled.sh!
+11
buf.gen.yaml
··· 1 + version: v2 2 + plugins: 3 + - local: protoc-gen-go 4 + out: . 5 + opt: 6 + - paths=import 7 + - module=tangled.org/core 8 + - local: protoc-gen-prost 9 + out: shuttle/src/gen 10 + opt: 11 + - bytes=.
+6
buf.lock
··· 1 + # Generated by buf. DO NOT EDIT. 2 + version: v2 3 + deps: 4 + - name: buf.build/bufbuild/protovalidate 5 + commit: 50325440f8f24053b047484a6bf60b76 6 + digest: b5:74cb6f5c0853c3c10aafc701614194bbd63326bdb8ef4068214454b8894b03ba4113e04b3a33a8321cdf05336e37db4dc14a5e2495db8462566914f36086ba31
+5
buf.yaml
··· 1 + version: v2 2 + modules: 3 + - path: spindle/agentproto 4 + deps: 5 + - buf.build/bufbuild/protovalidate
+26
cmd/spindle-microvm-run/main.go
··· 1 + //go:build linux 2 + 3 + package main 4 + 5 + import ( 6 + "context" 7 + "log/slog" 8 + "os" 9 + 10 + tlog "tangled.org/core/log" 11 + ) 12 + 13 + func main() { 14 + cmd := SpindleMicroVMRunCommand() 15 + 16 + logger := tlog.New("spindle-microvm-run") 17 + slog.SetDefault(logger) 18 + 19 + ctx := context.Background() 20 + ctx = tlog.IntoContext(ctx, logger) 21 + 22 + if err := cmd.Run(ctx, os.Args); err != nil { 23 + logger.Error(err.Error()) 24 + os.Exit(-1) 25 + } 26 + }
+357
cmd/spindle-microvm-run/main_linux.go
··· 1 + //go:build linux 2 + 3 + package main 4 + 5 + import ( 6 + "context" 7 + "database/sql" 8 + "errors" 9 + "fmt" 10 + "log/slog" 11 + "net" 12 + "os" 13 + "time" 14 + 15 + "github.com/mdlayher/vsock" 16 + "github.com/urfave/cli/v3" 17 + agentv1 "tangled.org/core/spindle/agentproto/gen" 18 + "tangled.org/core/spindle/db" 19 + "tangled.org/core/spindle/engines/microvm" 20 + ) 21 + 22 + func SpindleMicroVMRunCommand() *cli.Command { 23 + return &cli.Command{ 24 + Name: "spindle-microvm-run", 25 + Usage: "launch the Spindle base microVM and run one command over vsock", 26 + Flags: []cli.Flag{ 27 + &cli.StringFlag{ 28 + Name: "image-spec", 29 + Sources: cli.EnvVars("SPINDLE_MICROVM_IMAGE_SPEC"), 30 + Usage: "path to microVM image spec JSON", 31 + }, 32 + &cli.StringFlag{ 33 + Name: "mkfs-ext4", 34 + Usage: "override mkfs.ext4 binary", 35 + }, 36 + &cli.StringFlag{ 37 + Name: "work-dir", 38 + Usage: "directory for per-run disks and sockets", 39 + }, 40 + &cli.UintFlag{ 41 + Name: "cid", 42 + Usage: "guest vsock CID; defaults to a random high CID", 43 + }, 44 + &cli.UintFlag{ 45 + Name: "port", 46 + Value: 10240, 47 + Usage: "host vsock port to listen on", 48 + }, 49 + &cli.UintFlag{ 50 + Name: "memory-mib", 51 + Usage: "override the guest memory size in MiB (defaults to the image spec)", 52 + }, 53 + &cli.BoolFlag{ 54 + Name: "disable-kvm", 55 + Usage: "run without -enable-kvm even if /dev/kvm is available", 56 + }, 57 + &cli.BoolFlag{ 58 + Name: "dev", 59 + Usage: "enable dev mode (allows host network access, disables SSL verification)", 60 + }, 61 + &cli.DurationFlag{ 62 + Name: "qmp-timeout", 63 + Value: 10 * time.Second, 64 + Usage: "how long to wait for qmp to become ready", 65 + }, 66 + &cli.DurationFlag{ 67 + Name: "accept-timeout", 68 + Value: 15 * time.Second, 69 + Usage: "how long to wait for the guest agent after qemu starts", 70 + }, 71 + &cli.DurationFlag{ 72 + Name: "exec-timeout", 73 + Value: 30 * time.Second, 74 + Usage: "timeout for the guest command", 75 + }, 76 + &cli.DurationFlag{ 77 + Name: "cache-drain-timeout", 78 + Value: 5 * time.Minute, 79 + Usage: "how long to wait for queued cache uploads after the guest command exits", 80 + }, 81 + &cli.DurationFlag{ 82 + Name: "shutdown-timeout", 83 + Value: 10 * time.Second, 84 + Usage: "how long to wait for qemu to exit after guest powerdown", 85 + }, 86 + &cli.StringFlag{ 87 + Name: "cwd", 88 + Usage: "guest working directory", 89 + }, 90 + &cli.StringSliceFlag{ 91 + Name: "cache-read-url", 92 + Sources: cli.EnvVars("SPINDLE_NIX_CACHE_READ_URLS"), 93 + Usage: "Nix binary cache URL to pass to the guest; repeatable", 94 + }, 95 + &cli.StringSliceFlag{ 96 + Name: "cache-trusted-public-key", 97 + Sources: cli.EnvVars("SPINDLE_NIX_CACHE_TRUSTED_PUBLIC_KEYS"), 98 + Usage: "Nix binary cache public key to trust in the guest; repeatable", 99 + }, 100 + &cli.StringFlag{ 101 + Name: "cache-upload-url", 102 + Sources: cli.EnvVars("SPINDLE_NIX_CACHE_UPLOAD_URL"), 103 + Usage: "optional cache upload URL for guest-built store paths", 104 + }, 105 + &cli.StringFlag{ 106 + Name: "activate-config", 107 + Usage: "JSON user config to activate before exec (e.g. '{\"services\":{\"openssh\":{\"enable\":true}}}')", 108 + }, 109 + &cli.StringFlag{ 110 + Name: "db", 111 + Usage: "path to sqlite database for config cache", 112 + }, 113 + }, 114 + Action: runMicroVMRunDev, 115 + } 116 + } 117 + 118 + func runMicroVMRunDev(ctx context.Context, cmd *cli.Command) error { 119 + imageSpecPath := cmd.String("image-spec") 120 + if imageSpecPath == "" { 121 + return fmt.Errorf("--image-spec or SPINDLE_MICROVM_IMAGE_SPEC is required") 122 + } 123 + 124 + imageSpec, err := microvm.LoadImageSpec(imageSpecPath) 125 + if err != nil { 126 + return err 127 + } 128 + 129 + port := uint32(cmd.Uint("port")) 130 + // tell the guest which host vsock port to dial back on. shuttle reads the 131 + // cmdline params this is so we can run multiple of this process 132 + // concurrently, because otherwise it listens on a specific vsock port, and 133 + // we cant bind to the same port twice... 134 + imageSpec.BootArgs = fmt.Sprintf("%s shuttle.vsock_port=%d", imageSpec.BootArgs, port) 135 + if mib := cmd.Uint("memory-mib"); mib > 0 { 136 + imageSpec.MemoryMiB = int(mib) 137 + } 138 + ln, err := vsock.Listen(port, nil) 139 + if err != nil { 140 + return fmt.Errorf("listen on vsock port %d: %w", port, err) 141 + } 142 + defer ln.Close() 143 + 144 + vm, err := microvm.StartVM(ctx, microvm.VMConfig{ 145 + Image: imageSpec, 146 + BootTimeout: cmd.Duration("qmp-timeout"), 147 + CID: uint32(cmd.Uint("cid")), 148 + EnableKVM: !cmd.Bool("disable-kvm"), 149 + MkfsExt4: cmd.String("mkfs-ext4"), 150 + WorkDir: cmd.String("work-dir"), 151 + Dev: cmd.Bool("dev"), 152 + }, slog.Default()) 153 + if err != nil { 154 + return err 155 + } 156 + defer vm.Close() 157 + 158 + logs := vm.Logs() 159 + fmt.Fprintf(os.Stderr, "microvm started: cid=%d work-dir=%s serial-log=%s qemu-log=%s\n", 160 + vm.CID(), 161 + vm.WorkDir(), 162 + logs.Serial, 163 + logs.Extra["qemu"], 164 + ) 165 + 166 + logger := slog.Default() 167 + 168 + if cmd.Duration("accept-timeout") > 0 { 169 + if err := ln.SetDeadline(time.Now().Add(cmd.Duration("accept-timeout"))); err != nil { 170 + return fmt.Errorf("set accept deadline: %w", err) 171 + } 172 + } 173 + 174 + argv := cmd.Args().Slice() 175 + if len(argv) == 0 { 176 + argv = []string{"/run/current-system/sw/bin/echo", "hello-from-spindle"} 177 + } 178 + jobID := "spindle-microvm-run" 179 + execID := "dev-1" 180 + 181 + fmt.Fprintf(os.Stderr, "listening for agent on %s\n", ln.Addr()) 182 + conn, err := acceptExpectedVsockConn(ln, vm.CID(), logger) 183 + if err != nil { 184 + return fmt.Errorf("accept agent connection: %w", err) 185 + } 186 + defer conn.Close() 187 + 188 + upstreams, err := microvm.BuildCacheUpstreams(cmd.StringSlice("cache-read-url"), nil) 189 + if err != nil { 190 + return fmt.Errorf("build cache upstreams: %w", err) 191 + } 192 + 193 + var readCache *microvm.ReadCacheProxy 194 + if len(cmd.StringSlice("cache-read-url")) > 0 { 195 + var err error 196 + readCache, err = microvm.StartReadCacheProxy(ctx, vm.CID(), upstreams, logger) 197 + if err != nil { 198 + return fmt.Errorf("start read cache proxy: %w", err) 199 + } 200 + defer readCache.Close() 201 + } 202 + 203 + var uploadCache *microvm.UploadCacheProxy 204 + if cmd.String("cache-upload-url") != "" { 205 + var err error 206 + uploadCache, err = microvm.StartUploadCacheProxy(ctx, vm.CID(), cmd.String("cache-upload-url"), upstreams, logger) 207 + if err != nil { 208 + return fmt.Errorf("start upload cache proxy: %w", err) 209 + } 210 + defer uploadCache.Close() 211 + } 212 + dnsProxy, err := microvm.StartDNSProxy(ctx, vm.CID(), logger) 213 + if err != nil { 214 + return fmt.Errorf("start dns proxy: %w", err) 215 + } 216 + defer dnsProxy.Close() 217 + 218 + session := microvm.NewAgentSession(conn, logger) 219 + 220 + initCtx, cancelInit := context.WithTimeout(ctx, 30*time.Second) 221 + defer cancelInit() 222 + if err := session.Init(initCtx, &agentv1.Init{ 223 + JobId: jobID, 224 + CacheTrustedPublicKeys: cmd.StringSlice("cache-trusted-public-key"), 225 + CacheReadProxyPort: readCache.Port(), 226 + CacheUploadProxyPort: uploadCache.Port(), 227 + DnsProxyPort: dnsProxy.Port(), 228 + }); err != nil { 229 + return fmt.Errorf("init agent: %w", err) 230 + } 231 + 232 + execCtx := ctx 233 + if cmd.Duration("exec-timeout") > 0 { 234 + var cancel context.CancelFunc 235 + execCtx, cancel = context.WithTimeout(ctx, cmd.Duration("exec-timeout")) 236 + defer cancel() 237 + } 238 + 239 + if cmd.String("activate-config") != "" { 240 + actCtx := execCtx 241 + baseHash, err := microvm.BaseConfigHash(imageSpec) 242 + if err != nil { 243 + return fmt.Errorf("calculate base config hash: %w", err) 244 + } 245 + 246 + var d *db.DB 247 + var configKey string 248 + var cachedToplevel string 249 + if cmd.String("db") != "" { 250 + d, err = db.Make(ctx, cmd.String("db")) 251 + if err != nil { 252 + return fmt.Errorf("failed to open database: %w", err) 253 + } 254 + defer d.Close() 255 + 256 + configKey, err = microvm.BuildConfigKey(imageSpec, cmd.String("activate-config")) 257 + if err != nil { 258 + return fmt.Errorf("calculate config key: %w", err) 259 + } 260 + 261 + record, err := d.GetNixOSToplevelCacheRecord(configKey) 262 + if err != nil { 263 + if !errors.Is(err, sql.ErrNoRows) { 264 + return fmt.Errorf("lookup config cache: %w", err) 265 + } 266 + } else { 267 + cachedToplevel = record.Toplevel 268 + fmt.Printf("realizing cached NixOS config %s\n", cachedToplevel) 269 + } 270 + } 271 + 272 + result, err := session.ActivateConfig(actCtx, "dev-activate", &agentv1.ActivateConfig{ 273 + ConfigKey: configKey, 274 + BaseConfigHash: baseHash, 275 + UserConfig: cmd.String("activate-config"), 276 + Toplevel: cachedToplevel, 277 + }) 278 + if err != nil { 279 + return fmt.Errorf("activate config: %w", err) 280 + } 281 + fmt.Fprintf(os.Stderr, "activated config toplevel: %s\n", result.Toplevel) 282 + 283 + if d != nil && cachedToplevel == "" && result.Toplevel != "" && configKey != "" { 284 + err = d.SaveNixOSToplevelCacheRecord(configKey, result.Toplevel) 285 + if err != nil { 286 + return fmt.Errorf("save config cache: %w", err) 287 + } 288 + } 289 + } 290 + 291 + exitCode, err := session.Exec(execCtx, microvm.AgentExec{ 292 + ID: execID, 293 + ExecStart: &agentv1.ExecStart{ 294 + Argv: argv, 295 + Cwd: cmd.String("cwd"), 296 + }, 297 + Stdout: os.Stdout, 298 + Stderr: os.Stderr, 299 + }) 300 + if err != nil { 301 + return err 302 + } 303 + 304 + if uploadCache != nil { 305 + drainCtx := ctx 306 + if cmd.Duration("cache-drain-timeout") > 0 { 307 + var cancel context.CancelFunc 308 + drainCtx, cancel = context.WithTimeout(ctx, cmd.Duration("cache-drain-timeout")) 309 + defer cancel() 310 + } 311 + uploaded, err := session.Drain(drainCtx) 312 + if err != nil { 313 + return err 314 + } 315 + fmt.Printf("cache uploaded: %d\n", uploaded) 316 + } 317 + 318 + // mirror the engine shutdown order: ask the agent to power off first, 319 + // then fall back to qemu powerdown / kill 320 + shutdownCtx, cancel := context.WithTimeout(context.Background(), cmd.Duration("shutdown-timeout")) 321 + defer cancel() 322 + poweredOff := false 323 + if err := session.Poweroff(shutdownCtx); err != nil { 324 + fmt.Fprintf(os.Stderr, "agent poweroff: %s\n", err) 325 + } else if err := vm.WaitContext(shutdownCtx); err == nil { 326 + poweredOff = true 327 + } 328 + if !poweredOff { 329 + if err := vm.Shutdown(shutdownCtx); err != nil { 330 + fmt.Fprintf(os.Stderr, "microvm shutdown fallback: %s\n", err) 331 + } 332 + } 333 + 334 + if exitCode != 0 { 335 + return fmt.Errorf("guest command exited with code %d", exitCode) 336 + } 337 + return nil 338 + } 339 + 340 + func acceptExpectedVsockConn(ln *vsock.Listener, allowedCID uint32, logger *slog.Logger) (net.Conn, error) { 341 + for { 342 + conn, err := ln.Accept() 343 + if err != nil { 344 + return nil, err 345 + } 346 + if allowedCID == 0 { 347 + return conn, nil 348 + } 349 + addr, ok := conn.RemoteAddr().(*vsock.Addr) 350 + if ok && addr.ContextID == allowedCID { 351 + return conn, nil 352 + } 353 + remote := conn.RemoteAddr() 354 + _ = conn.Close() 355 + logger.Warn("dropped agent connection from unexpected cid", "remote", remote, "expected", allowedCID) 356 + } 357 + }
+13
cmd/spindle-microvm-run/unsupported.go
··· 1 + //go:build !linux 2 + 3 + package main 4 + 5 + import ( 6 + "fmt" 7 + "os" 8 + ) 9 + 10 + func main() { 11 + fmt.Fprintf(os.Stderr, "spindle-microvm-run is only supported on Linux\n") 12 + os.Exit(-1) 13 + }
+22 -3
cmd/spindle/main.go
··· 5 5 "log/slog" 6 6 "os" 7 7 8 + "github.com/urfave/cli/v3" 8 9 tlog "tangled.org/core/log" 9 10 "tangled.org/core/spindle" 10 11 ) 11 12 12 13 func main() { 14 + cmd := &cli.Command{ 15 + Name: "spindle", 16 + Usage: "spindle continuous integration runner", 17 + Commands: []*cli.Command{ 18 + Command(), 19 + }, 20 + DefaultCommand: "run", 21 + } 22 + 13 23 logger := tlog.New("spindle") 14 24 slog.SetDefault(logger) 15 25 16 26 ctx := context.Background() 17 27 ctx = tlog.IntoContext(ctx, logger) 18 28 19 - err := spindle.Run(ctx) 20 - if err != nil { 21 - logger.Error("error running spindle", "error", err) 29 + if err := cmd.Run(ctx, os.Args); err != nil { 30 + logger.Error(err.Error()) 22 31 os.Exit(-1) 23 32 } 24 33 } 34 + 35 + func Command() *cli.Command { 36 + return &cli.Command{ 37 + Name: "run", 38 + Usage: "run the spindle server", 39 + Action: func(ctx context.Context, cmd *cli.Command) error { 40 + return spindle.Run(ctx) 41 + }, 42 + } 43 + }
+61 -6
docker-compose.yml
··· 50 50 PDS_URL: http://pds:3000 51 51 OWNER_USER: alice 52 52 KNOT_HOSTNAME: knot.tngl.boltless.dev 53 + SPINDLE_HOSTNAME: spindle.tngl.boltless.dev 53 54 volumes: 54 55 - ./localinfra/scripts/init-accounts.sh:/init.sh:ro 55 56 - init-state:/shared ··· 85 86 JETSTREAM_WS_URL: wss://pds.tngl.boltless.dev/xrpc/com.atproto.sync.subscribeRepos 86 87 volumes: 87 88 - jetstream-data:/data 88 - - ./localinfra/certs/root.crt:/etc/ssl/certs/caddy.crt:ro 89 + - ./localinfra/certs/root.crt:/etc/ssl/certs/ca-certificates.crt:ro 89 90 depends_on: 90 91 pds: 91 92 condition: service_healthy ··· 113 114 - knot-data:/home/git 114 115 - knot-ssh-keys:/etc/ssh/keys 115 116 - init-state:/shared:ro 116 - - ./localinfra/certs/root.crt:/etc/ssl/certs/caddy.crt:ro 117 + - ./localinfra/certs/root.crt:/usr/local/share/ca-certificates/caddy.crt:ro 117 118 healthcheck: 118 119 test: ["CMD", "wget", "-qO-", "http://localhost:5555/"] 119 120 interval: 2s ··· 131 132 condition: service_completed_successfully 132 133 networks: [tngl] 133 134 135 + spindle: 136 + build: 137 + context: . 138 + dockerfile: localinfra/spindle.Dockerfile 139 + restart: unless-stopped 140 + environment: 141 + SPINDLE_SERVER_HOSTNAME: spindle.tngl.boltless.dev 142 + SPINDLE_SERVER_LISTEN_ADDR: 0.0.0.0:6555 143 + SPINDLE_SERVER_DB_PATH: /var/lib/spindle/spindle.db 144 + SPINDLE_SERVER_PLC_URL: https://plc.tngl.boltless.dev 145 + SPINDLE_SERVER_JETSTREAM_ENDPOINT: wss://jetstream.tngl.boltless.dev/subscribe 146 + SPINDLE_SERVER_DEV: "true" 147 + SPINDLE_SERVER_DEV_EXTRA_HOSTS: knot.tngl.boltless.dev,mirror.tngl.boltless.dev 148 + SPINDLE_SERVER_TAP_DB_PATH: /var/lib/spindle/tap.db 149 + SPINDLE_SERVER_TAP_RELAY_URL: https://pds.tngl.boltless.dev 150 + SPINDLE_MICROVM_PIPELINES_IMAGE_DIR: /var/lib/spindle/images 151 + SPINDLE_MICROVM_PIPELINES_OVERLAY_DIR: /var/lib/spindle/overlays 152 + SPINDLE_MICROVM_PIPELINES_AGENT_PORT: "11240" 153 + SPINDLE_S3_LOG_BUCKET: "" 154 + devices: 155 + - /dev/vsock:/dev/vsock 156 + - /dev/kvm:/dev/kvm 157 + - /dev/vhost-vsock:/dev/vhost-vsock 158 + - /dev/net/tun:/dev/net/tun 159 + cap_add: 160 + - NET_ADMIN 161 + - SYS_ADMIN 162 + security_opt: 163 + - label=disable 164 + - seccomp=unconfined 165 + volumes: 166 + - spindle-data:/var/lib/spindle 167 + - spindle-logs:/var/log/spindle 168 + - ./out/localinfra-spindle-images:/var/lib/spindle/images:ro 169 + - init-state:/shared:ro 170 + - ./localinfra/certs/root.crt:/usr/local/share/ca-certificates/caddy.crt:ro 171 + healthcheck: 172 + test: ["CMD", "wget", "-qO-", "http://localhost:6555/"] 173 + interval: 2s 174 + timeout: 2s 175 + retries: 30 176 + start_period: 5s 177 + depends_on: 178 + plc: 179 + condition: service_started 180 + jetstream: 181 + condition: service_started 182 + init-accounts: 183 + condition: service_completed_successfully 184 + networks: [tngl] 185 + 134 186 knotmirror-tap: 135 187 image: ghcr.io/bluesky-social/indigo/tap:sha-4f47add43060c27e8a37d9d76482ecddf001fcd8 # 0.1.10 136 188 restart: unless-stopped ··· 144 196 TAP_RESYNC_PARALLELISM: "10" 145 197 TAP_RETRY_TIMEOUT: 60s 146 198 volumes: 147 - - ./localinfra/certs/root.crt:/etc/ssl/certs/caddy.crt:ro 199 + - ./localinfra/certs/root.crt:/etc/ssl/certs/ca-certificates.crt:ro 148 200 depends_on: 149 201 postgres: 150 202 condition: service_started ··· 171 223 MIRROR_RESYNC_PARALLELISM: "4" 172 224 volumes: 173 225 - knotmirror-data:/data 174 - - ./localinfra/certs/root.crt:/etc/ssl/certs/caddy.crt:ro 226 + - ./localinfra/certs/root.crt:/usr/local/share/ca-certificates/caddy.crt:ro 175 227 healthcheck: 176 228 test: ["CMD", "wget", "-qO-", "http://localhost:7000/"] 177 229 interval: 2s ··· 202 254 - ./blog/templates:/build/blog/templates:ro 203 255 - ./blog/posts:/build/blog/posts:ro 204 256 - ./appview/pages/static:/build/appview/pages/static 205 - command: ["-i", "input.css", "-o", "appview/pages/static/tw.css", "--watch=always"] 257 + command: 258 + ["-i", "input.css", "-o", "appview/pages/static/tw.css", "--watch=always"] 206 259 network_mode: none 207 260 208 261 appview: ··· 226 279 - go-mod-cache:/go/mod 227 280 - appview-data:/var/lib/appview 228 281 - init-state:/shared:ro 229 - - ./localinfra/certs/root.crt:/etc/ssl/certs/caddy.crt:ro 282 + - ./localinfra/certs/root.crt:/usr/local/share/ca-certificates/caddy.crt:ro 230 283 depends_on: 231 284 redis: 232 285 condition: service_started ··· 267 320 knot-data: 268 321 knot-ssh-keys: 269 322 knotmirror-data: 323 + spindle-data: 324 + spindle-logs: 270 325 init-state: 271 326 go-cache: 272 327 go-mod-cache:
+297 -51
docs/DOCS.md
··· 760 760 directory at the root of your repository, and are defined 761 761 using YAML. 762 762 763 - The fields are: 763 + A workflow has a set of common fields that apply no matter 764 + which engine you pick: 764 765 765 766 - [Trigger](#trigger): A **required** field that defines 766 767 when a workflow should be triggered. ··· 768 769 engine a workflow should run on. 769 770 - [Clone options](#clone-options): An **optional** field 770 771 that defines how the repository should be cloned. 771 - - [Dependencies](#dependencies): An **optional** field that 772 - allows you to list dependencies you may need. 773 772 - [Environment](#environment): An **optional** field that 774 773 allows you to define environment variables. 775 774 - [Steps](#steps): An **optional** field that allows you to 776 775 define what steps should run in the workflow. 776 + 777 + On top of these, each engine has its own options for things 778 + like dependencies and images. See [Engines](#engines) for 779 + the per-engine fields. 777 780 778 781 ### Trigger 779 782 ··· 853 856 search for packages on https://search.nixos.org, and 854 857 there's a pretty good chance the package(s) you're looking 855 858 for will be there. 859 + See [Nixery engine](#nixery-engine). 860 + - `microvm`: Runs the whole workflow inside its own 861 + microVM. Has configuration features for NixOS images 862 + that will let you enable services, do Docker-in-VM, etc. 863 + See [microVM engine](#microvm-engine). 856 864 857 865 Example: 858 866 859 867 ```yaml 860 868 engine: "nixery" 861 869 ``` 870 + 871 + Each engine also adds its own workflow fields (dependencies, 872 + images, services, and so on). These are documented under 873 + [Engines](#engines). 862 874 863 875 ### Clone options 864 876 ··· 891 903 submodules: false 892 904 ``` 893 905 894 - ### Dependencies 895 - 896 - Usually when you're running a workflow, you'll need 897 - additional dependencies. The `dependencies` field lets you 898 - define which dependencies to get, and from where. It's a 899 - key-value map, with the key being the registry to fetch 900 - dependencies from, and the value being the list of 901 - dependencies to fetch. 902 - 903 - The registry URL syntax can be found [on the nix 904 - manual](https://nix.dev/manual/nix/2.18/command-ref/new-cli/nix3-registry-add). 905 - 906 - Say you want to fetch Node.js and Go from `nixpkgs`, and a 907 - package called `my_pkg` you've made from your own registry 908 - at your repository at 909 - `https://tangled.org/@example.com/my_pkg`. You can define 910 - those dependencies like so: 911 - 912 - ```yaml 913 - dependencies: 914 - # nixpkgs 915 - nixpkgs: 916 - - nodejs 917 - - go 918 - # unstable 919 - nixpkgs/nixpkgs-unstable: 920 - - bun 921 - # custom registry 922 - git+https://tangled.org/@example.com/my_pkg: 923 - - my_pkg 924 - ``` 925 - 926 - Now these dependencies are available to use in your 927 - workflow! 928 - 929 906 ### Environment 930 907 931 908 The `environment` field allows you define environment ··· 992 969 - `command`: This field allows you to define a command to 993 970 run in that step. The step is run in a Bash shell, and the 994 971 logs from the command will be visible in the pipelines 995 - page on the Tangled website. The 996 - [dependencies](#dependencies) you added will be available 997 - to use here. 972 + page on the Tangled website. Any dependencies you added in 973 + your engine's section (see [Engines](#engines)) will be 974 + available to use here. 998 975 - `environment`: Similar to the global 999 976 [environment](#environment) config, this **optional** 1000 977 field is a key-value map that allows you to set ··· 1018 995 NODE_ENV: "production" 1019 996 ``` 1020 997 1021 - ### Complete workflow 998 + ## Engines 999 + 1000 + The common fields above apply to every workflow. Each engine 1001 + then adds its own fields on top. Pick an engine with the 1002 + [`engine`](#engine) field and use the matching section below. 1003 + 1004 + ### Nixery engine 1005 + 1006 + #### Dependencies 1007 + 1008 + When you're running a workflow you'll usually need additional 1009 + dependencies. The `dependencies` field lets you define which 1010 + dependencies to get, and from where. It's a key-value map, 1011 + with the key being the registry to fetch dependencies from, 1012 + and the value being the list of dependencies to fetch. 1013 + 1014 + The registry URL syntax can be found [on the nix 1015 + manual](https://nix.dev/manual/nix/2.18/command-ref/new-cli/nix3-registry-add). 1016 + 1017 + Say you want to fetch Node.js and Go from `nixpkgs`, and a 1018 + package called `my_pkg` you've made from your own registry 1019 + at your repository at 1020 + `https://tangled.org/@example.com/my_pkg`. You can define 1021 + those dependencies like so: 1022 + 1023 + ```yaml 1024 + dependencies: 1025 + # nixpkgs 1026 + nixpkgs: 1027 + - nodejs 1028 + - go 1029 + # unstable 1030 + nixpkgs/nixpkgs-unstable: 1031 + - bun 1032 + # custom registry 1033 + git+https://tangled.org/@example.com/my_pkg: 1034 + - my_pkg 1035 + ``` 1036 + 1037 + Now these dependencies are available to use in your 1038 + workflow! 1039 + 1040 + #### Complete nixery workflow 1022 1041 1023 1042 ```yaml 1024 1043 # .tangled/workflows/build.yml ··· 1068 1087 the one [Tangled uses to build the 1069 1088 project](https://tangled.org/@tangled.org/core/blob/master/.tangled/workflows/build.yml). 1070 1089 1090 + ### microVM engine 1091 + 1092 + #### Image 1093 + 1094 + A workflow picks the image to boot with the top-level `image` 1095 + field: 1096 + 1097 + ```yaml 1098 + engine: microvm 1099 + image: nixos 1100 + ``` 1101 + 1102 + There are two flavours of images: 1103 + 1104 + - **NixOS images** (e.g. `nixos`): the whole guest is built 1105 + with Nix, so you can configure it from the workflow file 1106 + itself. The `dependencies`, `services`, `virtualisation`, 1107 + `registry` and `caches` fields below are all understood 1108 + here, and the guest builds and activates that configuration 1109 + before any of your steps run. 1110 + - **Non-NixOS images** (e.g. `alpine`): there's no NixOS to 1111 + configure, so the workflow-level config fields above have 1112 + no effect. You still get a full machine to run steps in. 1113 + 1114 + The available image names depend on what the spindle operator 1115 + has installed. `nixos` and `alpine` are examples. If `image` 1116 + is omitted, the spindle's configured default image is used. 1117 + 1118 + #### Dependencies 1119 + 1120 + On the microVM engine, `dependencies` is a flat list of 1121 + packages that get added to the guest's `PATH` (via 1122 + `environment.systemPackages`). This field only applies to 1123 + **NixOS images**, for other images you can use the package 1124 + manager included in a step. 1125 + 1126 + A bare name like `go` is looked up in nixpkgs. You can also 1127 + point at any flake with the `flakeref#attr` syntax, so 1128 + `github:nixos/nixpkgs#hello` pulls `hello` straight out of 1129 + that flake. 1130 + 1131 + ```yaml 1132 + dependencies: 1133 + - go 1134 + - github:nixos/nixpkgs#hello 1135 + ``` 1136 + 1137 + #### Registry 1138 + 1139 + The `registry` field remaps flake references, the same way 1140 + `nix registry` does. This lets you pin or alias the flakes 1141 + used by `dependencies`. 1142 + 1143 + For example, pin `nixpkgs` to `nixos-unstable` so that the 1144 + bare `go` above resolves from unstable, and alias your own 1145 + flake so you can use `myflake#tool` in `dependencies`: 1146 + 1147 + ```yaml 1148 + registry: 1149 + nixpkgs: github:nixos/nixpkgs/nixos-unstable 1150 + myflake: github:me/x 1151 + ``` 1152 + 1153 + #### Caches 1154 + 1155 + The `caches` field is a map of Nix binary cache URL to its 1156 + trusted public key. These are fed into the spindle's read 1157 + proxy, so the guest can substitute prebuilt paths from them 1158 + instead of building everything from scratch. 1159 + 1160 + ```yaml 1161 + caches: 1162 + https://nix-community.cachix.org: "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=" 1163 + ``` 1164 + 1165 + #### Services and virtualisation 1166 + 1167 + The `services` and `virtualisation` fields are passed straight 1168 + through to NixOS. Anything you could write under 1169 + `services.*` or `virtualisation.*` in a NixOS configuration, 1170 + you can write here, and it's brought up before any of your 1171 + steps run. 1172 + 1173 + As a convenience, `true` works as shorthand for 1174 + `.enable = true` anywhere an `enable` option exists (e.g. 1175 + `virtualisation.docker: true`). 1176 + 1177 + ```yaml 1178 + services: 1179 + postgresql: 1180 + enable: true 1181 + ensureDatabases: ["spindle-workflow"] 1182 + ensureUsers: 1183 + - name: spindle-workflow 1184 + ensureDBOwnership: true 1185 + 1186 + virtualisation: 1187 + docker: true 1188 + ``` 1189 + 1071 1190 ## Self-hosting guide 1072 1191 1073 1192 ### Prerequisites 1074 1193 1075 1194 - Go 1076 - - Docker (the only supported backend currently) 1195 + - For the **nixery** engine: Docker (or Podman with Docker 1196 + compatibility enabled). 1197 + - For the **microVM** engine: a Linux host with KVM, plus the 1198 + microVM host dependencies described in [Running microVM 1199 + workflows](#running-microvm-workflows). 1077 1200 1078 1201 ### Configuration 1079 1202 ··· 1090 1213 - `SPINDLE_PIPELINES_NIXERY`: The Nixery URL (default: `"nixery.tangled.sh"`). 1091 1214 - `SPINDLE_PIPELINES_WORKFLOW_TIMEOUT`: The default workflow timeout (default: `"5m"`). 1092 1215 1216 + For the microVM engine, the following are also available 1217 + (prefix `SPINDLE_MICROVM_PIPELINES_`): 1218 + 1219 + - `SPINDLE_MICROVM_PIPELINES_IMAGE_DIR`: Directory containing 1220 + microVM images (**required** to use the engine). See 1221 + [Running microVM workflows](#running-microvm-workflows). 1222 + - `SPINDLE_MICROVM_PIPELINES_DEFAULT_IMAGE`: Image used when a 1223 + workflow doesn't set `image` (default: `"nixos-x86_64"`). 1224 + - `SPINDLE_MICROVM_PIPELINES_OVERLAY_DIR`: Where per-workflow 1225 + temporary disks are created (default: the system temp dir). 1226 + - `SPINDLE_MICROVM_PIPELINES_ENABLE_KVM`: Use KVM hardware 1227 + acceleration (default: `true`). Without KVM, guests fall 1228 + back to slow software emulation. 1229 + - `SPINDLE_MICROVM_PIPELINES_WORKFLOW_TIMEOUT`: Default 1230 + workflow timeout (default: `"5m"`). 1231 + 1232 + Optional resource limits (a value of `0` disables that 1233 + limit). The limits cap usage across all running microVM 1234 + workflows: 1235 + 1236 + - `SPINDLE_MICROVM_PIPELINES_MAX_TOTAL_MEMORY_MIB` 1237 + - `SPINDLE_MICROVM_PIPELINES_MAX_TOTAL_VCPUS` 1238 + - `SPINDLE_MICROVM_PIPELINES_MAX_TOTAL_DISK_MIB` 1239 + 1240 + Optional cgroup enforcement: 1241 + 1242 + - `SPINDLE_MICROVM_PIPELINES_ENABLE_CGROUPS`: Place each 1243 + workflow's QEMU and slirp4netns in a per-workflow cgroup= 1244 + (default: `false`). 1245 + - `SPINDLE_MICROVM_PIPELINES_CGROUP_PARENT`: Parent cgroup; 1246 + `self` resolves the spindle service's own cgroup (default: 1247 + `"self"`). 1248 + - `SPINDLE_MICROVM_PIPELINES_CGROUP_PIDS_MAX`: Max processes 1249 + per workflow cgroup (default: `4096`). 1250 + - `SPINDLE_MICROVM_PIPELINES_CGROUP_SWAP_MAX_MIB`: Max swap 1251 + per workflow cgroup (default: `0`, no swap). 1252 + - `SPINDLE_MICROVM_PIPELINES_CGROUP_SUPERVISOR_MEMORY_MIN_MIB`: 1253 + Memory protected for spindle itself so it isn't OOM-killed 1254 + before the workflows (default: `512`). 1255 + 1256 + To push paths built inside microVMs back to a shared Nix 1257 + cache (and read from it), configure the cache (prefix 1258 + `SPINDLE_NIX_CACHE_`): 1259 + 1260 + - `SPINDLE_NIX_CACHE_READ_URLS`: Comma-separated binary cache 1261 + URLs the guest reads from. 1262 + - `SPINDLE_NIX_CACHE_TRUSTED_PUBLIC_KEYS`: Comma-separated 1263 + trusted public keys for those caches. 1264 + - `SPINDLE_NIX_CACHE_UPLOAD_URL`: Cache URL that paths built 1265 + in the guest are uploaded to. 1266 + 1093 1267 ### Running spindle 1094 1268 1095 1269 1. **Set the environment variables.** For example: ··· 1122 1296 1123 1297 Spindle will now start, connect to the Jetstream server, and begin processing pipelines. 1124 1298 1299 + ### Running microVM workflows 1300 + 1301 + The microVM engine needs a few extra things on the host, and 1302 + it needs images to boot. 1303 + 1304 + #### Host dependencies 1305 + 1306 + microVM workflows depend on a handful of host tools and 1307 + devices. spindle checks for the ones an image needs right 1308 + before it launches, so a missing dependency surfaces as a 1309 + clear error. You'll need: 1310 + 1311 + - `qemu`: the runner. The QEMU binary for the image's arch 1312 + must be present (e.g. `qemu-system-x86_64`). 1313 + - `mkfs.ext4` (from `e2fsprogs`): to format the per-workflow 1314 + writable volumes. 1315 + - [`slirp4netns`](https://github.com/rootless-containers/slirp4netns#install), 1316 + `ip` (from `iproute2`), `mount` and `unshare` (from `util-linux`): 1317 + used to sandbox guest networking. 1318 + - `/dev/kvm`: for hardware acceleration (unless you disable 1319 + KVM with `SPINDLE_MICROVM_PIPELINES_ENABLE_KVM=false`). 1320 + - `/dev/vhost-vsock`: the guest agent talks to spindle over 1321 + vsock. 1322 + 1323 + On NixOS, the [spindle 1324 + module](https://tangled.org/tangled.org/core/blob/master/nix/modules/spindle.nix) 1325 + puts `qemu`, `e2fsprogs`, `slirp4netns`, `iproute2` and 1326 + `util-linux` on the service's `PATH` for you. 1327 + 1328 + #### Building images 1329 + 1330 + Images are built with Nix. The flake exposes packages for the 1331 + two stock images (use the `-tarball` prefixed ones for a gzipped 1332 + tarball you can copy to another host): 1333 + 1334 + ```shell 1335 + # a NixOS image 1336 + nix build .#spindle-nixos-image 1337 + # an Alpine image 1338 + nix build .#spindle-alpine-image 1339 + ``` 1340 + 1341 + #### Installing images 1342 + 1343 + Spindle looks for images in 1344 + `SPINDLE_MICROVM_PIPELINES_IMAGE_DIR`. An image is resolved by 1345 + the name a workflow puts in its `image` field, matched 1346 + literally against what's on disk: 1347 + 1348 + 1. a directory `<name>/` containing a `spec.json` (next to the 1349 + kernel/initrd/store-disk), or 1350 + 2. a flat `<name>.json` self-contained spec. 1351 + 1352 + Resolution depends only on the name and what's on disk, never 1353 + on the host doing the resolving, so the same workflow resolves 1354 + to the same image on every spindle. If you keep multiple 1355 + arches side by side, you can name them `<name>-<arch>` (e.g. 1356 + `nixos-x86_64`, `alpine-aarch64`); the suffix is just part of 1357 + the name. To make a name like `nixos` work if you are hosting 1358 + multiple arches, you can use symlinks. 1359 + 1360 + On NixOS, you'll most likely want to use `systemd.tmpfiles.rules` 1361 + to set these up declaratively. 1362 + 1125 1363 ## Architecture 1126 1364 1127 1365 Spindle is a small CI runner service. Here's a high-level overview of how it operates: ··· 1135 1373 - The spindle engine then handles execution of the pipeline, with results and 1136 1374 logs beamed on the spindle event stream over WebSocket 1137 1375 1138 - ### The engine 1376 + ### The engines 1139 1377 1140 - At present, the only supported backend is Docker (and Podman, if Docker 1141 - compatibility is enabled, so that `/run/docker.sock` is created). spindle 1142 - executes each step in the pipeline in a fresh container, with state persisted 1143 - across steps within the `/tangled/workspace` directory. 1378 + Spindle has two execution backends, picked per-workflow with 1379 + the [`engine`](#engine) field: 1144 1380 1145 - The base image for the container is constructed on the fly using 1146 - [Nixery](https://nixery.dev), which is handy for caching layers for frequently 1147 - used packages. 1381 + - **nixery**: executes each step in a fresh Docker container 1382 + (Podman works too, if Docker compatibility is enabled so 1383 + that `/run/docker.sock` is created), with state persisted 1384 + across steps within the `/tangled/workspace` directory. The 1385 + base image for the container is constructed on the fly using 1386 + [Nixery](https://nixery.dev), which is/rhandy for caching 1387 + layers for frequently used packages. 1388 + - **microvm**: runs the whole workflow inside its own 1389 + microVM, supporting different images, with extra 1390 + configuration for NixOS images (e.g. services in workflow file) 1391 + See the [engine 1392 + README](https://tangled.org/tangled.org/core/blob/master/spindle/engines/microvm/README.md) 1393 + for the architecture in depth. 1148 1394 1149 1395 The pipeline manifest is [specified here](https://docs.tangled.org/spindles.html#pipelines). 1150 1396
+1 -6
eventconsumer/consumer.go
··· 5 5 "encoding/json" 6 6 "log/slog" 7 7 "net/http" 8 - "net/url" 9 8 "sync" 10 9 "time" 11 10 ··· 29 28 QueueSize int 30 29 Logger *slog.Logger 31 30 CursorStore cursor.Store 32 - URLFunc func(Source, int64) (*url.URL, error) 33 31 34 32 Dialer *websocket.Dialer 35 33 RequestHeader http.Header ··· 92 90 } 93 91 if cfg.CursorStore == nil { 94 92 cfg.CursorStore = &cursor.MemoryStore{} 95 - } 96 - if cfg.URLFunc == nil { 97 - cfg.URLFunc = DefaultURL(false) 98 93 } 99 94 dialer := cfg.Dialer 100 95 if dialer == nil { ··· 263 258 func (c *Consumer) runConnection(ctx context.Context, source Source) error { 264 259 cursor := c.cfg.CursorStore.Get(source.Key()) 265 260 266 - u, err := c.cfg.URLFunc(source, cursor) 261 + u, err := source.URL(cursor) 267 262 if err != nil { 268 263 return err 269 264 }
+1 -5
eventconsumer/consumer_test.go
··· 69 69 srv := httptest.NewServer(mux) 70 70 t.Cleanup(srv.Close) 71 71 addr := strings.TrimPrefix(srv.URL, "http://") 72 - return Source{Kind: "test", Host: addr}, &n 72 + return Source{Kind: "test", Host: addr, NoTLS: true}, &n 73 73 } 74 74 75 75 func TestConsumer_DrainAdvancesCursor(t *testing.T) { ··· 95 95 QueueSize: 16, 96 96 ConnectionTimeout: 2 * time.Second, 97 97 CursorStore: store, 98 - URLFunc: DefaultURL(true), 99 98 Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), 100 99 } 101 100 c := NewConsumer(cfg) ··· 158 157 QueueSize: 16, 159 158 ConnectionTimeout: 2 * time.Second, 160 159 CursorStore: store, 161 - URLFunc: DefaultURL(true), 162 160 Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), 163 161 } 164 162 c := NewConsumer(cfg) ··· 203 201 QueueSize: 8, 204 202 ConnectionTimeout: 2 * time.Second, 205 203 CursorStore: &cursor.MemoryStore{}, 206 - URLFunc: DefaultURL(true), 207 204 Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), 208 205 } 209 206 c := NewConsumer(cfg) ··· 249 246 QueueSize: 16, 250 247 ConnectionTimeout: 2 * time.Second, 251 248 CursorStore: store, 252 - URLFunc: DefaultURL(true), 253 249 Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), 254 250 } 255 251 c := NewConsumer(cfg)
+23 -17
eventconsumer/source.go
··· 5 5 "strconv" 6 6 7 7 "tangled.org/core/eventconsumer/cursor" 8 + "tangled.org/core/hostutil" 8 9 ) 9 10 10 11 type Kind string ··· 15 16 ) 16 17 17 18 type Source struct { 18 - Kind Kind 19 - Host string 19 + Kind Kind 20 + Host string 21 + NoTLS bool // use TLS by default 20 22 } 21 23 22 - func NewKnotSource(host string) Source { return Source{Kind: KindKnot, Host: host} } 23 - func NewSpindleSource(host string) Source { return Source{Kind: KindSpindle, Host: host} } 24 + func NewKnotSource(host string) Source { 25 + host, noTLS, _ := hostutil.ParseHostname(host) 26 + return Source{Kind: KindKnot, Host: host, NoTLS: noTLS} 27 + } 28 + func NewSpindleSource(host string) Source { 29 + host, noTLS, _ := hostutil.ParseHostname(host) 30 + return Source{Kind: KindSpindle, Host: host, NoTLS: noTLS} 31 + } 24 32 25 33 func (s Source) Key() string { return string(s.Kind) + ":" + s.Host } 26 34 ··· 33 41 } 34 42 } 35 43 36 - func DefaultURL(dev bool) func(Source, int64) (*url.URL, error) { 44 + func (s Source) URL(cursor int64) (*url.URL, error) { 37 45 scheme := "wss" 38 - if dev { 46 + if s.NoTLS { 39 47 scheme = "ws" 40 48 } 41 - return func(s Source, cursor int64) (*url.URL, error) { 42 - u, err := url.Parse(scheme + "://" + s.Host + "/events") 43 - if err != nil { 44 - return nil, err 45 - } 46 - if cursor != 0 { 47 - q := url.Values{} 48 - q.Add("cursor", strconv.FormatInt(cursor, 10)) 49 - u.RawQuery = q.Encode() 50 - } 51 - return u, nil 49 + u, err := url.Parse(scheme + "://" + s.Host + "/events") 50 + if err != nil { 51 + return nil, err 52 52 } 53 + if cursor != 0 { 54 + q := url.Values{} 55 + q.Add("cursor", strconv.FormatInt(cursor, 10)) 56 + u.RawQuery = q.Encode() 57 + } 58 + return u, nil 53 59 }
-1
eventconsumer/upgrade_test.go
··· 39 39 QueueSize: 16, 40 40 ConnectionTimeout: 2 * time.Second, 41 41 CursorStore: store, 42 - URLFunc: DefaultURL(true), 43 42 Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), 44 43 }) 45 44
+41 -3
flake.lock
··· 117 117 "ibm-plex-mono-src": { 118 118 "flake": false, 119 119 "locked": { 120 - "lastModified": 1731402384, 120 + "lastModified": 1731402378, 121 121 "narHash": "sha256-OwUmrPfEehLDz0fl2ChYLK8FQM2p0G1+EMrGsYEq+6g=", 122 122 "type": "tarball", 123 - "url": "https://github.com/IBM/plex/releases/download/@ibm%2Fplex-mono@1.1.0/ibm-plex-mono.zip" 123 + "url": "https://github.com/IBM/plex/releases/download/@ibm/plex-mono@1.1.0/ibm-plex-mono.zip" 124 124 }, 125 125 "original": { 126 126 "type": "tarball", 127 - "url": "https://github.com/IBM/plex/releases/download/@ibm%2Fplex-mono@1.1.0/ibm-plex-mono.zip" 127 + "url": "https://github.com/IBM/plex/releases/download/@ibm/plex-mono@1.1.0/ibm-plex-mono.zip" 128 128 } 129 129 }, 130 130 "indigo": { ··· 181 181 "url": "https://cdn.jsdelivr.net/npm/mermaid@11.12.3/dist/mermaid.min.js" 182 182 } 183 183 }, 184 + "microvm": { 185 + "inputs": { 186 + "nixpkgs": [ 187 + "nixpkgs" 188 + ], 189 + "spectrum": "spectrum" 190 + }, 191 + "locked": { 192 + "lastModified": 1779970379, 193 + "narHash": "sha256-ZHsxoYXXnfJtMVh1/yY+1Eh9hHcPBhE28Qvinauh+BQ=", 194 + "owner": "microvm-nix", 195 + "repo": "microvm.nix", 196 + "rev": "0d49083ba2d7419b22908ac392777c16df9a032e", 197 + "type": "github" 198 + }, 199 + "original": { 200 + "owner": "microvm-nix", 201 + "repo": "microvm.nix", 202 + "type": "github" 203 + } 204 + }, 184 205 "nixpkgs": { 185 206 "locked": { 186 207 "lastModified": 1771848320, ··· 210 231 "inter-fonts-src": "inter-fonts-src", 211 232 "lucide-src": "lucide-src", 212 233 "mermaid-src": "mermaid-src", 234 + "microvm": "microvm", 213 235 "nixpkgs": "nixpkgs", 214 236 "sqlite-lib-src": "sqlite-lib-src" 215 237 } ··· 229 251 "ref": "nightly", 230 252 "repo": "rust-analyzer", 231 253 "type": "github" 254 + } 255 + }, 256 + "spectrum": { 257 + "flake": false, 258 + "locked": { 259 + "lastModified": 1778940603, 260 + "narHash": "sha256-voSM8dZNlaOWN3kbYFky+FNY6fFQOEw0xF+ZMpZKkCQ=", 261 + "ref": "refs/heads/main", 262 + "rev": "367dd227f539267eae2b62770b4c17b88ac8c1f1", 263 + "revCount": 1265, 264 + "type": "git", 265 + "url": "https://spectrum-os.org/git/spectrum" 266 + }, 267 + "original": { 268 + "type": "git", 269 + "url": "https://spectrum-os.org/git/spectrum" 232 270 } 233 271 }, 234 272 "sqlite-lib-src": {
+138 -2
flake.nix
··· 3 3 4 4 inputs = { 5 5 nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; 6 + microvm = { 7 + url = "github:microvm-nix/microvm.nix"; 8 + inputs.nixpkgs.follows = "nixpkgs"; 9 + }; 6 10 fenix = { 7 11 url = "github:nix-community/fenix"; 8 12 inputs.nixpkgs.follows = "nixpkgs"; ··· 69 73 ibm-plex-mono-src, 70 74 actor-typeahead-src, 71 75 mermaid-src, 76 + microvm, 72 77 ... 73 78 }: let 74 79 supportedSystems = ["x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin"]; ··· 83 88 fs.toSource { 84 89 root = ./.; 85 90 fileset = fs.difference (fs.intersection (fs.gitTracked ./.) (fs.fileFilter (file: !(file.hasExt "nix")) ./.)) (fs.maybeMissing ./.jj); 91 + }; 92 + rustSrc = let 93 + fs = pkgs.lib.fileset; 94 + in 95 + fs.toSource { 96 + root = ./.; 97 + fileset = 98 + fs.intersection 99 + (fs.fromSource self.src) 100 + (fs.unions [ 101 + ./Cargo.toml 102 + ./Cargo.lock 103 + ./shuttle 104 + ./bobbin 105 + ]); 86 106 }; 87 107 buildGoApplication = 88 108 (self.callPackage "${gomod2nix}/builder" { 89 109 gomod2nix = gomod2nix.legacyPackages.${pkgs.stdenv.hostPlatform.system}.gomod2nix; 90 110 }).buildGoApplication; 91 111 rustPlatform = pkgs.makeRustPlatform { 92 - inherit (fenix.packages.${pkgs.system}.stable) rustc cargo; 112 + inherit (fenix.packages.${pkgs.stdenv.hostPlatform.system}.stable) rustc cargo; 93 113 }; 114 + rustPlatformStatic = let 115 + system = pkgs.stdenv.hostPlatform.system; 116 + muslTarget = pkgs.pkgsStatic.stdenv.hostPlatform.rust.rustcTarget; 117 + toolchain = fenix.packages.${system}.combine [ 118 + fenix.packages.${system}.stable.cargo 119 + fenix.packages.${system}.stable.rustc 120 + fenix.packages.${system}.targets.${muslTarget}.stable.rust-std 121 + ]; 122 + in 123 + pkgs.pkgsStatic.makeRustPlatform { 124 + cargo = toolchain; 125 + rustc = toolchain; 126 + }; 94 127 modules = ./nix/gomod2nix.toml; 95 128 sqlite-lib = self.callPackage ./nix/pkgs/sqlite-lib.nix { 96 129 inherit sqlite-lib-src; ··· 107 140 inherit (pkgs) pagefind; 108 141 }; 109 142 spindle = self.callPackage ./nix/pkgs/spindle.nix {}; 143 + shuttle = self.callPackage ./nix/pkgs/shuttle.nix { 144 + src = self.rustSrc; 145 + }; 146 + shuttle-static = self.callPackage ./nix/pkgs/shuttle.nix { 147 + src = self.rustSrc; 148 + rustPlatform = self.rustPlatformStatic; 149 + }; 110 150 knot-unwrapped = self.callPackage ./nix/pkgs/knot-unwrapped.nix {}; 111 151 knot = self.callPackage ./nix/pkgs/knot.nix {}; 112 152 dolly = self.callPackage ./nix/pkgs/dolly.nix {}; ··· 116 156 }); 117 157 in { 118 158 overlays.default = final: prev: { 119 - inherit (mkPackageSet final) lexgen goat sqlite-lib spindle knot-unwrapped knot appview docs dolly tap knotmirror bobbin; 159 + inherit (mkPackageSet final) lexgen goat sqlite-lib spindle shuttle knot-unwrapped knot appview docs dolly tap knotmirror bobbin; 120 160 }; 121 161 122 162 packages = forAllSystems (system: let 123 163 pkgs = nixpkgsFor.${system}; 164 + linuxPkgs = nixpkgsFor."x86_64-linux"; 124 165 packages = mkPackageSet pkgs; 125 166 staticPackages = mkPackageSet pkgs.pkgsStatic; 126 167 crossPackages = mkPackageSet pkgs.pkgsCross.gnu64.pkgsStatic; ··· 137 178 knot-unwrapped 138 179 sqlite-lib 139 180 docs 181 + shuttle 182 + shuttle-static 140 183 dolly 141 184 tap 142 185 knotmirror ··· 189 232 # }; 190 233 }; 191 234 }; 235 + 236 + spindle-nixos-image = linuxPkgs.callPackage ./nix/pkgs/spindle-nixos-image.nix { 237 + nixosSystem = self.nixosConfigurations.spindle-nixos; 238 + }; 239 + spindle-nixos-image-tarball = linuxPkgs.runCommand "spindle-nixos-image-tarball.tar.gz" {} '' 240 + tar -S -C ${self.packages.${system}.spindle-nixos-image} -h -czf $out . 241 + ''; 242 + 243 + spindle-alpine-image = let 244 + branch = "3.24"; 245 + version = "${branch}.0"; 246 + arch = "x86_64"; 247 + cdn = "https://dl-cdn.alpinelinux.org/alpine/v${branch}/releases/${arch}"; 248 + 249 + shuttle = (mkPackageSet linuxPkgs).shuttle-static; 250 + in 251 + linuxPkgs.callPackage ./nix/pkgs/spindle-alpine-image.nix { 252 + inherit arch shuttle; 253 + repositories = [ 254 + "https://dl-cdn.alpinelinux.org/alpine/v${branch}/main" 255 + "https://dl-cdn.alpinelinux.org/alpine/v${branch}/community" 256 + ]; 257 + rootfs = linuxPkgs.fetchurl { 258 + url = "${cdn}/alpine-minirootfs-${version}-${arch}.tar.gz"; 259 + hash = "sha256-3poRwODn6clNs+2K97RQ6vwLE2h71+kZnVUFDyCqCok="; 260 + }; 261 + kernel = linuxPkgs.fetchurl { 262 + url = "${cdn}/netboot-${version}/vmlinuz-virt"; 263 + hash = "sha256-Hmv5Ancgx1w+0NeRcfIbV5HuQMqXldB8fG4E3F6irpA="; 264 + }; 265 + initramfs = linuxPkgs.fetchurl { 266 + url = "${cdn}/netboot-${version}/initramfs-virt"; 267 + hash = "sha256-ZCWGSaVMOYOmLz1Gwsf2RhYarMqk+tFVA6MMDWiHVJQ="; 268 + }; 269 + modloop = linuxPkgs.fetchurl { 270 + url = "${cdn}/netboot-${version}/modloop-virt"; 271 + hash = "sha256-p3yO7yU28k04iT01sOzhDmEYi+Yl7VZs5r3RYsWCBX0="; 272 + }; 273 + }; 274 + spindle-alpine-image-tarball = linuxPkgs.runCommand "spindle-alpine-image-tarball.tar.gz" {} '' 275 + tar -S -C ${self.packages.${system}.spindle-alpine-image} -h -czf $out . 276 + ''; 192 277 }); 193 278 defaultPackage = forAllSystems (system: self.packages.${system}.appview); 194 279 devShells = forAllSystems (system: let ··· 219 304 pkgs.redis 220 305 pkgs.worker-build 221 306 pkgs.cargo-generate 307 + pkgs.qemu 308 + pkgs.cdrkit 309 + pkgs.parted 310 + pkgs.buf 311 + pkgs.protobuf 312 + pkgs.protoc-gen-prost 313 + pkgs.protoc-gen-prost-crate 314 + pkgs.protoc-gen-prost-serde 315 + pkgs.protoc-gen-go 222 316 (fenix.packages.${system}.combine [ 223 317 fenix.packages.${system}.stable.cargo 224 318 fenix.packages.${system}.stable.rustc ··· 231 325 packages'.lexgen 232 326 packages'.treefmt-wrapper 233 327 packages'.tap 328 + pkgs.e2fsprogs 329 + pkgs.slirp4netns 330 + pkgs.iproute2 331 + pkgs.util-linux 234 332 ]; 235 333 shellHook = '' 236 334 mkdir -p appview/pages/static ··· 306 404 exec ${pkgs.python3}/bin/python3 -m http.server 1414 307 405 ''); 308 406 }; 407 + regenerate-proto = { 408 + type = "app"; 409 + program = 410 + (pkgs.writeShellApplication { 411 + name = "regenerate-proto"; 412 + runtimeInputs = with pkgs; [git buf coreutils]; 413 + text = '' 414 + rootDir=$(git rev-parse --show-toplevel 2>/dev/null || pwd) 415 + cd "$rootDir" 416 + echo ">>> regenerating protobuf files.." 417 + buf generate 418 + echo ">>> generating file descriptor set for shuttle..." 419 + buf build -o shuttle/src/gen/file_descriptor_set.bin 420 + echo ">>> done" 421 + ''; 422 + }) 423 + + "/bin/regenerate-proto"; 424 + }; 309 425 vm = let 310 426 guestSystem = 311 427 if pkgs.stdenv.hostPlatform.isAarch64 ··· 409 525 410 526 services.tangled.spindle.package = lib.mkDefault self.packages.${pkgs.stdenv.hostPlatform.system}.spindle; 411 527 }; 528 + nixosModules.shuttle = { 529 + lib, 530 + pkgs, 531 + ... 532 + }: { 533 + imports = [./nix/modules/shuttle.nix]; 534 + 535 + services.tangled.shuttle.package = lib.mkDefault self.packages.${pkgs.stdenv.hostPlatform.system}.shuttle; 536 + }; 537 + 538 + nixosModules.spindle-nixos = import ./nix/microvm/spindle-vm.nix {inherit self microvm;} ./nix/microvm/qemu.nix; 412 539 413 540 formatter = forAllSystems (system: self.packages.${system}.treefmt-wrapper); 541 + 542 + nixosConfigurations = let 543 + spindleNixosBase = nixpkgs.lib.nixosSystem { 544 + system = "x86_64-linux"; 545 + modules = [self.nixosModules.spindle-nixos]; 546 + }; 547 + in { 548 + spindle-nixos = spindleNixosBase; 549 + }; 414 550 }; 415 551 }
+28 -7
go.mod
··· 3 3 go 1.25.0 4 4 5 5 require ( 6 + buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.11-20260415201107-50325440f8f2.1 7 + buf.build/go/protovalidate v1.2.0 6 8 github.com/Blank-Xu/sql-adapter v1.1.1 7 9 github.com/adrg/frontmatter v0.2.0 8 10 github.com/alecthomas/assert/v2 v2.11.0 ··· 27 29 github.com/charmbracelet/ssh v0.0.0-20250128164007-98fd5ae11894 28 30 github.com/charmbracelet/wish v1.4.7 29 31 github.com/cloudflare/cloudflare-go/v6 v6.7.0 32 + github.com/containerd/cgroups/v3 v3.1.3 30 33 github.com/cyphar/filepath-securejoin v0.4.1 31 34 github.com/dgraph-io/ristretto v0.2.0 32 35 github.com/did-method-plc/go-didplc v0.2.2 36 + github.com/digitalocean/go-qemu v0.0.0-20250212194115-ee9b0668d242 33 37 github.com/djherbis/buffer v1.2.0 34 38 github.com/djherbis/nio/v3 v3.0.1 35 39 github.com/docker/docker v28.2.2+incompatible ··· 49 53 github.com/jackc/pgx/v5 v5.8.0 50 54 github.com/landlock-lsm/go-landlock v0.8.1 51 55 github.com/mattn/go-sqlite3 v1.14.34 56 + github.com/mdlayher/vsock v1.3.0 57 + github.com/miekg/dns v1.1.72 52 58 github.com/microcosm-cc/bluemonday v1.0.27 53 59 github.com/multiformats/go-multihash v0.2.3 54 60 github.com/openbao/openbao/api/v2 v2.3.0 55 61 github.com/posthog/posthog-go v1.5.5 56 62 github.com/prometheus/client_golang v1.23.2 63 + github.com/prometheus/procfs v0.19.2 57 64 github.com/redis/go-redis/v9 v9.7.3 58 65 github.com/resend/resend-go/v3 v3.5.0 59 66 github.com/sethvargo/go-envconfig v1.1.0 ··· 67 74 github.com/yuin/goldmark-highlighting/v2 v2.0.0-20230729083705-37449abec8cc 68 75 gitlab.com/staticnoise/goldmark-callout v0.0.0-20240609120641-6366b799e4ab 69 76 go.abhg.dev/goldmark/mermaid v0.6.0 70 - golang.org/x/crypto v0.48.0 77 + golang.org/x/crypto v0.51.0 71 78 golang.org/x/image v0.31.0 72 - golang.org/x/net v0.50.0 73 - golang.org/x/sync v0.19.0 74 - golang.org/x/sys v0.41.0 79 + golang.org/x/net v0.55.0 80 + golang.org/x/sync v0.20.0 81 + golang.org/x/sys v0.45.0 75 82 golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da 83 + google.golang.org/protobuf v1.36.11 76 84 gopkg.in/yaml.v3 v3.0.1 77 85 ) 78 86 79 87 require ( 88 + cel.dev/expr v0.25.1 // indirect 80 89 dario.cat/mergo v1.0.1 // indirect 81 90 github.com/BurntSushi/toml v0.3.1 // indirect 82 91 github.com/Microsoft/go-winio v0.6.2 // indirect ··· 85 94 github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b // indirect 86 95 github.com/alecthomas/repr v0.5.2 // indirect 87 96 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect 97 + github.com/antlr4-go/antlr/v4 v4.13.1 // indirect 88 98 github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 // indirect 89 99 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20 // indirect 90 100 github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.20 // indirect ··· 134 144 github.com/charmbracelet/x/term v0.2.2 // indirect 135 145 github.com/charmbracelet/x/termios v0.1.0 // indirect 136 146 github.com/charmbracelet/x/windows v0.2.0 // indirect 147 + github.com/cilium/ebpf v0.16.0 // indirect 137 148 github.com/clipperhouse/displaywidth v0.9.0 // indirect 138 149 github.com/clipperhouse/stringish v0.1.1 // indirect 139 150 github.com/clipperhouse/uax29/v2 v2.5.0 // indirect ··· 141 152 github.com/containerd/errdefs v1.0.0 // indirect 142 153 github.com/containerd/errdefs/pkg v0.3.0 // indirect 143 154 github.com/containerd/log v0.1.0 // indirect 155 + github.com/coreos/go-systemd/v22 v22.5.0 // indirect 144 156 github.com/creack/pty v1.1.21 // indirect 145 157 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 146 158 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect 159 + github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e // indirect 147 160 github.com/distribution/reference v0.6.0 // indirect 148 161 github.com/dlclark/regexp2 v1.11.5 // indirect 149 162 github.com/docker/go-connections v0.5.0 // indirect ··· 163 176 github.com/go-redis/cache/v9 v9.0.0 // indirect 164 177 github.com/go-test/deep v1.1.1 // indirect 165 178 github.com/goccy/go-json v0.10.5 // indirect 179 + github.com/godbus/dbus/v5 v5.1.0 // indirect 166 180 github.com/gogo/protobuf v1.3.2 // indirect 167 181 github.com/golang-jwt/jwt v3.2.2+incompatible // indirect 168 182 github.com/golang-jwt/jwt/v5 v5.3.0 // indirect ··· 170 184 github.com/golang/mock v1.6.0 // indirect 171 185 github.com/golang/protobuf v1.5.4 // indirect 172 186 github.com/golang/snappy v0.0.4 // indirect 187 + github.com/google/cel-go v0.28.0 // indirect 173 188 github.com/google/go-querystring v1.1.0 // indirect 174 189 github.com/gorilla/css v1.0.1 // indirect 175 190 github.com/gorilla/securecookie v1.1.2 // indirect ··· 219 234 github.com/mattn/go-isatty v0.0.20 // indirect 220 235 github.com/mattn/go-localereader v0.0.1 // indirect 221 236 github.com/mattn/go-runewidth v0.0.19 // indirect 237 + github.com/mdlayher/socket v0.6.0 // indirect 222 238 github.com/minio/sha256-simd v1.0.1 // indirect 223 239 github.com/mitchellh/mapstructure v1.5.0 // indirect 224 240 github.com/moby/docker-image-spec v1.3.1 // indirect 225 241 github.com/moby/sys/atomicwriter v0.1.0 // indirect 242 + github.com/moby/sys/userns v0.1.0 // indirect 226 243 github.com/moby/term v0.5.2 // indirect 227 244 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 228 245 github.com/modern-go/reflect2 v1.0.2 // indirect ··· 240 257 github.com/onsi/gomega v1.37.0 // indirect 241 258 github.com/opencontainers/go-digest v1.0.0 // indirect 242 259 github.com/opencontainers/image-spec v1.1.1 // indirect 260 + github.com/opencontainers/runtime-spec v1.3.0 // indirect 243 261 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect 244 262 github.com/pjbgf/sha1cd v0.3.2 // indirect 245 263 github.com/pkg/errors v0.9.1 // indirect ··· 247 265 github.com/polydawn/refmt v0.89.1-0.20221221234430-40501e09de1f // indirect 248 266 github.com/prometheus/client_model v0.6.2 // indirect 249 267 github.com/prometheus/common v0.67.5 // indirect 250 - github.com/prometheus/procfs v0.19.2 // indirect 251 268 github.com/puzpuzpuz/xsync/v4 v4.2.0 // indirect 252 269 github.com/rivo/uniseg v0.4.7 // indirect 253 270 github.com/ryanuber/go-glob v1.0.0 // indirect 254 271 github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect 272 + github.com/sirupsen/logrus v1.9.3 // indirect 255 273 github.com/spaolacci/murmur3 v1.1.0 // indirect 256 274 github.com/tidwall/gjson v1.18.0 // indirect 257 275 github.com/tidwall/match v1.2.0 // indirect ··· 276 294 go.uber.org/zap v1.27.1 // indirect 277 295 go.yaml.in/yaml/v2 v2.4.3 // indirect 278 296 golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect 279 - golang.org/x/text v0.34.0 // indirect 297 + golang.org/x/mod v0.35.0 // indirect 298 + golang.org/x/text v0.37.0 // indirect 280 299 golang.org/x/time v0.12.0 // indirect 281 - google.golang.org/protobuf v1.36.11 // indirect 300 + golang.org/x/tools v0.44.0 // indirect 301 + google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect 302 + google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect 282 303 gopkg.in/fsnotify.v1 v1.4.7 // indirect 283 304 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect 284 305 gopkg.in/warnings.v0 v0.1.2 // indirect
+64 -12
go.sum
··· 1 + buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.11-20260415201107-50325440f8f2.1 h1:s6hzCXtND/ICdGPTMGk7C+/BFlr2Jg5GyH0NKf4XGXg= 2 + buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.11-20260415201107-50325440f8f2.1/go.mod h1:tvtbpgaVXZX4g6Pn+AnzFycuRK3MOz5HJfEGeEllXYM= 3 + buf.build/go/protovalidate v1.2.0 h1:DQVrUWkmGTBij+kOYv/x2LLxwcLaGKMdzShj1/6/3H0= 4 + buf.build/go/protovalidate v1.2.0/go.mod h1:7rYiQEhqvAipoazpVNBBH2S2f8bjG4huMVy1V2Yofn4= 5 + cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= 6 + cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= 1 7 dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= 2 8 dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= 3 9 github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= ··· 23 29 github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= 24 30 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= 25 31 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= 32 + github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= 33 + github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= 26 34 github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= 27 35 github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= 28 36 github.com/avast/retry-go/v4 v4.6.1 h1:VkOLRubHdisGrHnTu89g08aQEWEgRU7LVEop3GbIcMk= ··· 120 128 github.com/bmatcuk/doublestar/v4 v4.9.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= 121 129 github.com/boltlessengineer/indigo v0.0.0-20260315101958-fb1dfa36fed2 h1:63+EsT7kltod8g1eA0eNuvq1q9ANJWRdxlLeJjJDVYY= 122 130 github.com/boltlessengineer/indigo v0.0.0-20260315101958-fb1dfa36fed2/go.mod h1:VG/LeqLGNI3Ew7lsYixajnZGFfWPv144qbUddh+Oyag= 131 + github.com/brianvoe/gofakeit/v6 v6.28.0 h1:Xib46XXuQfmlLS2EXRuJpqcw8St6qSZz75OUo0tgAW4= 132 + github.com/brianvoe/gofakeit/v6 v6.28.0/go.mod h1:Xj58BMSnFqcn/fAQeSK+/PLtC5kSb7FJIq4JyGa8vEs= 123 133 github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= 124 134 github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= 125 135 github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= ··· 183 193 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= 184 194 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= 185 195 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= 196 + github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok= 197 + github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE= 186 198 github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA= 187 199 github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA= 188 200 github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs= ··· 193 205 github.com/cloudflare/circl v1.6.2-0.20250618153321-aa837fd1539d/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= 194 206 github.com/cloudflare/cloudflare-go/v6 v6.7.0 h1:MP6Xy5WmsyrxgTxoLeq/vraqR0nbTtXoHhW4vAYc4SY= 195 207 github.com/cloudflare/cloudflare-go/v6 v6.7.0/go.mod h1:Lj3MUqjvKctXRpdRhLQxZYRrNZHuRs0XYuH8JtQGyoI= 208 + github.com/containerd/cgroups/v3 v3.1.3 h1:eUNflyMddm18+yrDmZPn3jI7C5hJ9ahABE5q6dyLYXQ= 209 + github.com/containerd/cgroups/v3 v3.1.3/go.mod h1:PKZ2AcWmSBsY/tJUVhtS/rluX0b1uq1GmPO1ElCmbOw= 196 210 github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= 197 211 github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= 198 212 github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= 199 213 github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= 200 214 github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= 201 215 github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= 216 + github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= 217 + github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 202 218 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= 203 219 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 204 220 github.com/creack/pty v1.1.21 h1:1/QdRyBaHHJP61QkWMXlOIBfsgdDeeKfK8SYVUWJKf0= ··· 221 237 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= 222 238 github.com/did-method-plc/go-didplc v0.2.2 h1:53HFhTT8NCAeFmZ6fdIZCf3PGDvj7A3cDjzOOEqn5XM= 223 239 github.com/did-method-plc/go-didplc v0.2.2/go.mod h1:bKdJ21irnwNHgVLWWL32zUWqZueXYbJRUcxplZghByo= 240 + github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e h1:SCnqm8SjSa0QqRxXbo5YY//S+OryeJioe17nK+iDZpg= 241 + github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e/go.mod h1:o129ljs6alsIQTc8d6eweihqpmmrbxZ2g1jhgjhPykI= 242 + github.com/digitalocean/go-qemu v0.0.0-20250212194115-ee9b0668d242 h1:rh6rt8pF5U4iyQ86h6lRDenJoX4ht2wFnZXB9ogIrIM= 243 + github.com/digitalocean/go-qemu v0.0.0-20250212194115-ee9b0668d242/go.mod h1:LGHUtlhsY4vRGM6AHejEQKVI5e3eHbSylMHwTSpQtVw= 224 244 github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= 225 245 github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= 226 246 github.com/djherbis/buffer v1.1.0/go.mod h1:VwN8VdFkMY0DCALdY8o00d3IZ6Amz/UNVMWcSaJT44o= ··· 285 305 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 286 306 github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 287 307 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 308 + github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI= 309 + github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow= 288 310 github.com/go-redis/cache/v9 v9.0.0 h1:0thdtFo0xJi0/WXbRVu8B066z8OvVymXTJGaXrVWnN0= 289 311 github.com/go-redis/cache/v9 v9.0.0/go.mod h1:cMwi1N8ASBOufbIvk7cdXe2PbPjK/WMRL95FFHWsSgI= 290 312 github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg= ··· 301 323 github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= 302 324 github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= 303 325 github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= 326 + github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 327 + github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= 328 + github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 304 329 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 305 330 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 306 331 github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= ··· 325 350 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= 326 351 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= 327 352 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 353 + github.com/google/cel-go v0.28.0 h1:KjSWstCpz/MN5t4a8gnGJNIYUsJRpdi/r97xWDphIQc= 354 + github.com/google/cel-go v0.28.0/go.mod h1:X0bD6iVNR8pkROSOoHVdgTkzmRcosof7WQqCD6wcMc8= 328 355 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 329 356 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 330 357 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= ··· 465 492 github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= 466 493 github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= 467 494 github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= 495 + github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= 496 + github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= 497 + github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM= 498 + github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE= 468 499 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 469 500 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 470 501 github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= ··· 522 553 github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= 523 554 github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk= 524 555 github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= 556 + github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g= 557 + github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw= 558 + github.com/mdlayher/socket v0.6.0 h1:ScZPaAGyO1icQnbFrhPM8mnXyMu9qukC1K4ZoM2IQKU= 559 + github.com/mdlayher/socket v0.6.0/go.mod h1:q7vozUAnxSqnjHc12Fik5yUKIzfZ8ITCfMkhOtE9z18= 560 + github.com/mdlayher/vsock v1.3.0 h1:bqQfZ1OznI03y6YiXp2sze05RVdzLn/zsfjnjd4+ivI= 561 + github.com/mdlayher/vsock v1.3.0/go.mod h1:WsuksavOvwCnV5UqGHUkvAvCy+Dqy81y4goKQTzxxNY= 525 562 github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= 526 563 github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA= 564 + github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI= 565 + github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs= 527 566 github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= 528 567 github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= 529 568 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= ··· 534 573 github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs= 535 574 github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= 536 575 github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= 576 + github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= 577 + github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= 537 578 github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= 538 579 github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= 539 580 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= ··· 606 647 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= 607 648 github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= 608 649 github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= 650 + github.com/opencontainers/runtime-spec v1.3.0 h1:YZupQUdctfhpZy3TM39nN9Ika5CBWT5diQ8ibYCRkxg= 651 + github.com/opencontainers/runtime-spec v1.3.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= 609 652 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= 610 653 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A= 611 654 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU= ··· 642 685 github.com/resend/resend-go/v3 v3.5.0/go.mod h1:iI7VA0NoGjWvsNii5iNC5Dy0llsI3HncXPejhniYzwE= 643 686 github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 644 687 github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 688 + github.com/rodaine/protogofakeit v0.1.1 h1:ZKouljuRM3A+TArppfBqnH8tGZHOwM/pjvtXe9DaXH8= 689 + github.com/rodaine/protogofakeit v0.1.1/go.mod h1:pXn/AstBYMaSfc1/RqH3N82pBuxtWgejz1AlYpY1mI0= 645 690 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= 646 691 github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= 647 692 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= ··· 770 815 go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= 771 816 go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= 772 817 go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= 818 + go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= 819 + go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= 773 820 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 774 821 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 775 822 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= ··· 777 824 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 778 825 golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= 779 826 golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= 780 - golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= 781 - golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= 827 + golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= 828 + golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= 782 829 golang.org/x/exp v0.0.0-20260112195511-716be5621a96 h1:Z/6YuSHTLOHfNFdb8zVZomZr7cqNgTJvA8+Qz75D8gU= 783 830 golang.org/x/exp v0.0.0-20260112195511-716be5621a96/go.mod h1:nzimsREAkjBCIEFtHiYkrJyT+2uy9YZJB7H1k68CXZU= 784 831 golang.org/x/image v0.31.0 h1:mLChjE2MV6g1S7oqbXC0/UcKijjm5fnJLUYKIYrLESA= ··· 793 840 golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI= 794 841 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= 795 842 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= 843 + golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM= 844 + golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU= 796 845 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 797 846 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 798 847 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= ··· 813 862 golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= 814 863 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 815 864 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= 816 - golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60= 817 - golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= 865 + golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= 866 + golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= 818 867 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 819 868 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 820 869 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= ··· 822 871 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 823 872 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 824 873 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 825 - golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= 826 - golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= 874 + golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= 875 + golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= 827 876 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 828 877 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 829 878 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= ··· 845 894 golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 846 895 golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 847 896 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 897 + golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 848 898 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 849 899 golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 850 900 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= ··· 855 905 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 856 906 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 857 907 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 858 - golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= 859 - golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= 908 + golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= 909 + golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= 860 910 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 861 911 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 862 912 golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= ··· 866 916 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= 867 917 golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= 868 918 golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= 869 - golang.org/x/term v0.40.0 h1:36e4zGLqU4yhjlmxEaagx2KuYbJq3EwY8K943ZsHcvg= 870 - golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM= 919 + golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4= 920 + golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk= 871 921 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 872 922 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 873 923 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= ··· 878 928 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 879 929 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 880 930 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 881 - golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= 882 - golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= 931 + golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= 932 + golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= 883 933 golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= 884 934 golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= 885 935 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= ··· 899 949 golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= 900 950 golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= 901 951 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= 952 + golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c= 953 + golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI= 902 954 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 903 955 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 904 956 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+24
knotmirror/hostutil/hostutil.go hostutil/hostutil.go
··· 54 54 // lower-case in response 55 55 return h.Normalize().String(), noSSL, nil 56 56 } 57 + 58 + func EnsureHttpScheme(host string) (string, error) { 59 + hostname, noSSL, err := ParseHostname(host) 60 + if err != nil { 61 + return "", err 62 + } 63 + if noSSL { 64 + return "http://" + hostname, nil 65 + } else { 66 + return "https://" + hostname, nil 67 + } 68 + } 69 + 70 + func EnsureWsScheme(host string) (string, error) { 71 + hostname, noSSL, err := ParseHostname(host) 72 + if err != nil { 73 + return "", err 74 + } 75 + if noSSL { 76 + return "ws://" + hostname, nil 77 + } else { 78 + return "wss://" + hostname, nil 79 + } 80 + }
+1 -1
knotmirror/xrpc/sync_request_crawl.go
··· 11 11 "github.com/bluesky-social/indigo/atproto/syntax" 12 12 "github.com/bluesky-social/indigo/xrpc" 13 13 "tangled.org/core/api/tangled" 14 + "tangled.org/core/hostutil" 14 15 "tangled.org/core/knotmirror/db" 15 - "tangled.org/core/knotmirror/hostutil" 16 16 "tangled.org/core/knotmirror/models" 17 17 ) 18 18
+10
localinfra/Caddyfile
··· 37 37 reverse_proxy knot:5555 38 38 } 39 39 40 + # spindle 41 + http://spindle.tngl.boltless.dev { 42 + reverse_proxy spindle:6555 43 + } 44 + 45 + spindle.tngl.boltless.dev { 46 + tls internal 47 + reverse_proxy spindle:6555 48 + } 49 + 40 50 # knotmirror 41 51 mirror.tngl.boltless.dev { 42 52 tls internal
+4
localinfra/appview.Dockerfile
··· 41 41 [ -r /shared/label-defaults ] && export TANGLED_LABEL_DEFAULTS="$(cat /shared/label-defaults)" 42 42 [ -r /shared/label-gfi ] && export TANGLED_LABEL_GFI="$(cat /shared/label-gfi)" 43 43 44 + if [ -f /usr/local/share/ca-certificates/caddy.crt ]; then 45 + update-ca-certificates 46 + fi 47 + 44 48 exec air -c /src/.air/appview.toml 45 49 EOF 46 50 RUN chmod +x /usr/local/bin/appview-entrypoint.sh
+4
localinfra/knot.Dockerfile
··· 84 84 [ -f /etc/ssh/keys/ssh_host_ecdsa_key ] || ssh-keygen -t ecdsa -f /etc/ssh/keys/ssh_host_ecdsa_key -q -N "" 85 85 [ -f /etc/ssh/keys/ssh_host_ed25519_key ] || ssh-keygen -t ed25519 -f /etc/ssh/keys/ssh_host_ed25519_key -q -N "" 86 86 87 + if [ -f /usr/local/share/ca-certificates/caddy.crt ]; then 88 + update-ca-certificates 89 + fi 90 + 87 91 /usr/sbin/sshd -D -e & 88 92 exec su-exec git /usr/local/bin/knot server 89 93 EOF
+1 -4
localinfra/knotmirror.Dockerfile
··· 14 14 15 15 RUN apk add --no-cache git tini ca-certificates 16 16 17 - # Trust dev CA in the system bundle so git/curl/openssl all accept caddy certs. 18 - COPY localinfra/certs/root.crt /usr/local/share/ca-certificates/caddy.crt 19 - RUN update-ca-certificates 20 17 21 18 COPY --from=build /knotmirror /usr/local/bin/knotmirror 22 19 23 20 EXPOSE 7000 24 21 25 22 ENTRYPOINT ["/sbin/tini", "--"] 26 - CMD ["/usr/local/bin/knotmirror", "serve"] 23 + CMD ["sh", "-c", "if [ -f /usr/local/share/ca-certificates/caddy.crt ]; then update-ca-certificates; fi && exec /usr/local/bin/knotmirror serve"]
+8 -5
localinfra/readme.md
··· 14 14 - atproto_pds (<https://pds.tngl.boltless.dev>) 15 15 - jetstream (<https://jetstream.tngl.boltless.dev>) 16 16 - knot (<https://knot.tngl.boltless.dev>) 17 + - spindle (<https://spindle.tngl.boltless.dev>) 17 18 - knotmirror (<https://knotmirror.tngl.boltless.dev>) 18 19 - appview (<https://tngl.boltless.dev>) (live reloading) 19 20 - caddy reverse proxy 20 - 21 - > [!NOTE] 22 - > Spindle is not included yet. 23 21 24 22 ## Setup 25 23 ··· 44 42 ``` 45 43 - Depending on your browser you may have to import the certificate into your browser profiles too as some have their own certs do not use your system ones 46 44 3. run `./localinfra/scripts/appview-static-files.sh` 47 - 4. `docker compose up` 48 - 5. AppView will be running on `127.0.0.1:3000` with two test users: `alice.pds.tngl.boltless.dev` and `bob.pds.tngl.boltless.dev`. Both with password `password`. 45 + 4. Prepare the spindle microVM images: 46 + ```bash 47 + ./localinfra/scripts/prepare-spindle-images.sh 48 + ``` 49 + This writes the image directory under `out/localinfra-spindle-images`. 50 + 5. `docker compose up` 51 + 6. AppView will be running on `127.0.0.1:3000` with two test users: `alice.pds.tngl.boltless.dev` and `bob.pds.tngl.boltless.dev`. Both with password `password`.
+1
localinfra/scripts/init-accounts.sh
··· 187 187 JWT=$(login "$OWNER_DID") 188 188 189 189 put_record "$JWT" "$OWNER_DID" "sh.tangled.knot" $KNOT_HOSTNAME "{\"createdAt\": \"${CREATED_AT}\"}" 190 + put_record "$JWT" "$OWNER_DID" "sh.tangled.spindle" "$SPINDLE_HOSTNAME" "{\"createdAt\": \"${CREATED_AT}\"}" 190 191 191 192 printf 'done.\n' >&2
+32
localinfra/scripts/prepare-spindle-images.sh
··· 1 + #!/usr/bin/env bash 2 + set -euo pipefail 3 + 4 + repo=$(cd "$(dirname "$0")/../.." && pwd) 5 + image_root="${1:-$repo/out/localinfra-spindle-images}" 6 + 7 + mkdir -p "$image_root" 8 + 9 + extract_image() { 10 + local package="$1" 11 + local name="$2" 12 + shift 2 13 + 14 + local tarball 15 + tarball=$(nix build "$repo#$package" --no-link --print-out-paths) 16 + 17 + [ -d "$image_root/$name" ] && chmod -R +w "$image_root/$name" || true 18 + rm -rf "$image_root/$name" 19 + mkdir -p "$image_root/$name" 20 + tar -C "$image_root/$name" -xzf "$tarball" 21 + 22 + local alias 23 + for alias in "$@"; do 24 + rm -rf "$image_root/$alias" 25 + ln -s "$name" "$image_root/$alias" 26 + done 27 + } 28 + 29 + extract_image spindle-nixos-image-tarball nixos-x86_64 nixos 30 + extract_image spindle-alpine-image-tarball alpine-x86_64 alpine 31 + 32 + echo "prepared spindle microVM images in $image_root"
+66
localinfra/spindle.Dockerfile
··· 1 + # Development only. Not for production use. 2 + 3 + FROM golang:1.25-alpine AS builder 4 + 5 + RUN apk add --no-cache git build-base sqlite-dev 6 + 7 + ENV CGO_ENABLED=1 8 + ENV GOCACHE=/go/cache 9 + ENV GOMODCACHE=/go/mod 10 + 11 + WORKDIR /src 12 + 13 + COPY go.mod go.sum ./ 14 + RUN --mount=type=cache,target=/go/cache \ 15 + --mount=type=cache,target=/go/mod \ 16 + go mod download 17 + 18 + COPY . . 19 + RUN --mount=type=cache,target=/go/cache \ 20 + --mount=type=cache,target=/go/mod \ 21 + go build -tags libsqlite3 -o /out/spindle ./cmd/spindle && \ 22 + go build -tags libsqlite3 -o /out/spindle-microvm-run ./cmd/spindle-microvm-run 23 + 24 + FROM alpine:3.20 25 + 26 + RUN apk add --no-cache \ 27 + bash \ 28 + ca-certificates \ 29 + e2fsprogs \ 30 + git \ 31 + iproute2 \ 32 + qemu-system-x86_64 \ 33 + shadow \ 34 + slirp4netns \ 35 + sqlite-libs \ 36 + tini \ 37 + util-linux 38 + 39 + 40 + COPY --from=builder /out/spindle /usr/local/bin/spindle 41 + COPY --from=builder /out/spindle-microvm-run /usr/local/bin/spindle-microvm-run 42 + RUN chmod 0755 /usr/local/bin/spindle /usr/local/bin/spindle-microvm-run 43 + 44 + COPY <<'EOF' /usr/local/bin/spindle-entrypoint.sh 45 + #!/bin/sh 46 + set -eu 47 + 48 + [ -z "${SPINDLE_SERVER_OWNER:-}" ] && [ -r /shared/owner-did ] && \ 49 + export SPINDLE_SERVER_OWNER="$(cat /shared/owner-did)" 50 + : "${SPINDLE_SERVER_OWNER:?set via env or /shared/owner-did}" 51 + 52 + mkdir -p /var/lib/spindle /var/lib/spindle/overlays /var/log/spindle 53 + 54 + if [ -f /usr/local/share/ca-certificates/caddy.crt ]; then 55 + update-ca-certificates 56 + fi 57 + 58 + exec /usr/local/bin/spindle run 59 + EOF 60 + RUN chmod +x /usr/local/bin/spindle-entrypoint.sh 61 + 62 + VOLUME /var/lib/spindle 63 + EXPOSE 6555 64 + 65 + ENTRYPOINT ["/sbin/tini", "--"] 66 + CMD ["/usr/local/bin/spindle-entrypoint.sh"]
+73 -10
nix/gomod2nix.toml
··· 1 1 schema = 3 2 2 3 3 [mod] 4 + [mod."buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go"] 5 + version = "v1.36.11-20260415201107-50325440f8f2.1" 6 + hash = "sha256-oyhP92KT9a++f5riA9sI1myID7MR39AzHuT3cKx5hgg=" 7 + [mod."buf.build/go/protovalidate"] 8 + version = "v1.2.0" 9 + hash = "sha256-4+42DSj7LAdJyPPxif3b5QRH5MCjedudgylTHBibZlQ=" 10 + [mod."cel.dev/expr"] 11 + version = "v0.25.1" 12 + hash = "sha256-TEdMxFUPK7IZuCXMufwCkbN+ZZIXSQclljIybFZcByo=" 4 13 [mod."dario.cat/mergo"] 5 14 version = "v1.0.1" 6 15 hash = "sha256-wcG6+x0k6KzOSlaPA+1RFxa06/RIAePJTAjjuhLbImw=" ··· 38 47 [mod."github.com/anmitsu/go-shlex"] 39 48 version = "v0.0.0-20200514113438-38f4b401e2be" 40 49 hash = "sha256-L3Ak4X2z7WXq7vMKuiHCOJ29nlpajUQ08Sfb9T0yP54=" 50 + [mod."github.com/antlr4-go/antlr/v4"] 51 + version = "v4.13.1" 52 + hash = "sha256-beAuxHNRUuhzcSJUh/8ztVf1zCUiaT72fg2Jvx0AuNQ=" 41 53 [mod."github.com/avast/retry-go/v4"] 42 54 version = "v4.6.1" 43 55 hash = "sha256-PeZc8k4rDV64+k8nZt/oy1YNVbLevltXP3ZD1jf6Z6k=" ··· 244 256 [mod."github.com/charmbracelet/x/windows"] 245 257 version = "v0.2.0" 246 258 hash = "sha256-pDAd1E5w66E/d3vuTyzgnW+W/KegZ2sxQQMfoEn7S1A=" 259 + [mod."github.com/cilium/ebpf"] 260 + version = "v0.16.0" 261 + hash = "sha256-xACuieGmiUUjoTT/9MpvPBNexp98S/AZbLxm5f9nqDk=" 247 262 [mod."github.com/clipperhouse/displaywidth"] 248 263 version = "v0.9.0" 249 264 hash = "sha256-9CNyTZPSncKQ7Y0my9DR4WYXDjtDHYNL512D691WDAM=" ··· 259 274 [mod."github.com/cloudflare/cloudflare-go/v6"] 260 275 version = "v6.7.0" 261 276 hash = "sha256-ycQpx1II/JgBgrCRwY5qiVKStGv5wuCANy1091sJ5Zw=" 277 + [mod."github.com/containerd/cgroups/v3"] 278 + version = "v3.1.3" 279 + hash = "sha256-1a5heWXIzME7iMu2L35OBiAOi2Z/gnpg2fjvP6On9sM=" 262 280 [mod."github.com/containerd/errdefs"] 263 281 version = "v1.0.0" 264 282 hash = "sha256-wMZGoeqvRhuovYCJx0Js4P3qFCNTZ/6Atea/kNYoPMI=" ··· 268 286 [mod."github.com/containerd/log"] 269 287 version = "v0.1.0" 270 288 hash = "sha256-vuE6Mie2gSxiN3jTKTZovjcbdBd1YEExb7IBe3GM+9s=" 289 + [mod."github.com/coreos/go-systemd/v22"] 290 + version = "v22.5.0" 291 + hash = "sha256-E2zXikbmIQImghstLUWuey1YgA0Folu3F+fi5k4hCxA=" 271 292 [mod."github.com/creack/pty"] 272 293 version = "v1.1.21" 273 294 hash = "sha256-pjGw6wQlrVhN65XaIxZueNJqnXThGu00u24rKOLzxS0=" ··· 286 307 [mod."github.com/did-method-plc/go-didplc"] 287 308 version = "v0.2.2" 288 309 hash = "sha256-TF5vdW1U2q5F23ELmbqhdvpNgDQjBs9i/CWJlzqWNKs=" 310 + [mod."github.com/digitalocean/go-libvirt"] 311 + version = "v0.0.0-20220804181439-8648fbde413e" 312 + hash = "sha256-xgRZCefeUNM76M9ht5FCgCf4dnwAdeP/r+FZlJ2gmeY=" 313 + [mod."github.com/digitalocean/go-qemu"] 314 + version = "v0.0.0-20250212194115-ee9b0668d242" 315 + hash = "sha256-AzY84aq9CqGqYwmGkKoi4PX08E/hRQC46mrZwKSMwXE=" 289 316 [mod."github.com/distribution/reference"] 290 317 version = "v0.6.0" 291 318 hash = "sha256-gr4tL+qz4jKyAtl8LINcxMSanztdt+pybj1T+2ulQv4=" ··· 368 395 [mod."github.com/goccy/go-json"] 369 396 version = "v0.10.5" 370 397 hash = "sha256-/EtlGihP0/7oInzMC5E0InZ4b5Ad3s4xOpqotloi3xw=" 398 + [mod."github.com/godbus/dbus/v5"] 399 + version = "v5.1.0" 400 + hash = "sha256-xOCMJpQK3KTmHTPn/CdqI4j0eENCtMmJDgAIoYqYOEY=" 371 401 [mod."github.com/gogo/protobuf"] 372 402 version = "v1.3.2" 373 403 hash = "sha256-pogILFrrk+cAtb0ulqn9+gRZJ7sGnnLLdtqITvxvG6c=" ··· 389 419 [mod."github.com/golang/snappy"] 390 420 version = "v0.0.4" 391 421 hash = "sha256-Umx+5xHAQCN/Gi4HbtMhnDCSPFAXSsjVbXd8n5LhjAA=" 422 + [mod."github.com/google/cel-go"] 423 + version = "v0.28.0" 424 + hash = "sha256-86MSoJX3fovcAWir41Z34nM0HcaDh0yRA+lIiVA1gEM=" 392 425 [mod."github.com/google/go-querystring"] 393 426 version = "v1.1.0" 394 427 hash = "sha256-itsKgKghuX26czU79cK6C2n+lc27jm5Dw1XbIRgwZJY=" ··· 569 602 [mod."github.com/mattn/go-sqlite3"] 570 603 version = "v1.14.34" 571 604 hash = "sha256-PGOevTQb3gNdGu4H2bhvXJlntkMkDt+UhuiK1IpuorQ=" 605 + [mod."github.com/mdlayher/socket"] 606 + version = "v0.6.0" 607 + hash = "sha256-Qpwu6m0PqySAGxSkHfME00YLO+lhBc825EGqASDpob8=" 608 + [mod."github.com/mdlayher/vsock"] 609 + version = "v1.3.0" 610 + hash = "sha256-rkN/QRzs5hZ3idZ131yrNEkwfe+cyRkAizumJMQmFLQ=" 572 611 [mod."github.com/microcosm-cc/bluemonday"] 573 612 version = "v1.0.27" 574 613 hash = "sha256-EZSya9FLPQ83CL7N2cZy21fdS35hViTkiMK5f3op8Es=" 614 + [mod."github.com/miekg/dns"] 615 + version = "v1.1.72" 616 + hash = "sha256-i5TwAlGQjz6lF2SNc7MWuTLmxR/7f2Uomco3V0rKaRU=" 575 617 [mod."github.com/minio/sha256-simd"] 576 618 version = "v1.0.1" 577 619 hash = "sha256-4hfGDIQaWq8fvtGzHDhoK9v2IocXnJY7OAL6saMJbmA=" ··· 584 626 [mod."github.com/moby/sys/atomicwriter"] 585 627 version = "v0.1.0" 586 628 hash = "sha256-i46GNrsICnJ0AYkN+ocbVZ2GNTQVEsrVX5WcjKzjtBM=" 629 + [mod."github.com/moby/sys/userns"] 630 + version = "v0.1.0" 631 + hash = "sha256-zwXKyEZIH/FZjSVuSGmtwThDxPutj1pY+N6Ae6oVPuc=" 587 632 [mod."github.com/moby/term"] 588 633 version = "v0.5.2" 589 634 hash = "sha256-/G20jUZKx36ktmPU/nEw/gX7kRTl1Dbu7zvNBYNt4xU=" ··· 641 686 [mod."github.com/opencontainers/image-spec"] 642 687 version = "v1.1.1" 643 688 hash = "sha256-bxBjtl+6846Ed3QHwdssOrNvlHV6b+Dn17zPISSQGP8=" 689 + [mod."github.com/opencontainers/runtime-spec"] 690 + version = "v1.3.0" 691 + hash = "sha256-B2QF7FlUYZDL9eNA0+JD7WasnBryMXNIDbdSGS4MMG4=" 644 692 [mod."github.com/opentracing/opentracing-go"] 645 693 version = "v1.2.1-0.20220228012449-10b1cf09e00b" 646 694 hash = "sha256-77oWcDviIoGWHVAotbgmGRpLGpH5AUy+pM15pl3vRrw=" ··· 693 741 [mod."github.com/sethvargo/go-envconfig"] 694 742 version = "v1.1.0" 695 743 hash = "sha256-WelRHfyZG9hrA4fbQcfBawb2ZXBQNT1ourEYHzQdZ4w=" 744 + [mod."github.com/sirupsen/logrus"] 745 + version = "v1.9.3" 746 + hash = "sha256-EnxsWdEUPYid+aZ9H4/iMTs1XMvCLbXZRDyvj89Ebms=" 696 747 [mod."github.com/spaolacci/murmur3"] 697 748 version = "v1.1.0" 698 749 hash = "sha256-RWD4PPrlAsZZ8Xy356MBxpj+/NZI7w2XOU14Ob7/Y9M=" ··· 793 844 version = "v2.4.3" 794 845 hash = "sha256-WqfrOUQFvfuORgl1yyVOcsEXU/vwWQHkcVWx3vCxvaw=" 795 846 [mod."golang.org/x/crypto"] 796 - version = "v0.48.0" 797 - hash = "sha256-uBIGGSGmWWklRxX6XTOqUECzz165UFY9Y99Ka3pLKAw=" 847 + version = "v0.51.0" 848 + hash = "sha256-/R74sc1mcOaOuBeXRQzrXrHAgA5VhNWc6SfQJaxb17U=" 798 849 [mod."golang.org/x/exp"] 799 850 version = "v0.0.0-20260112195511-716be5621a96" 800 851 hash = "sha256-rWqwXzLvvhcI/ZkOQMqCXMKI5FAuHd9YNoKTXujmboA=" 801 852 [mod."golang.org/x/image"] 802 853 version = "v0.31.0" 803 854 hash = "sha256-ZFTlu9+4QToPPLA8C5UcG2eq/lQylq81RoG/WtYo9rg=" 855 + [mod."golang.org/x/mod"] 856 + version = "v0.35.0" 857 + hash = "sha256-ICEQxokHywOFInDPqoP+go9l1tZSz3roknF5SXPtNV4=" 804 858 [mod."golang.org/x/net"] 805 - version = "v0.50.0" 806 - hash = "sha256-A3tvRuVotO4d8S1FX9ri9CpMJacrFJmHebLJ5m9b+Ss=" 859 + version = "v0.55.0" 860 + hash = "sha256-Phi2mSmBGOJcvqPPAit3uqF3UP8SKRI9dHj6yTM3s5s=" 807 861 [mod."golang.org/x/sync"] 808 - version = "v0.19.0" 809 - hash = "sha256-RbRZ+sKZUurOczGhhzOoY/sojTlta3H9XjL4PXX/cno=" 862 + version = "v0.20.0" 863 + hash = "sha256-ybcjhCfK6lroUM0yswUvWooW8MOQZBXyiSqoxG6Uy0Y=" 810 864 [mod."golang.org/x/sys"] 811 - version = "v0.41.0" 812 - hash = "sha256-owjs3/IzAKfFlIz1U1fiHSfl2+bTUhaXTyWEjL5SWHk=" 865 + version = "v0.45.0" 866 + hash = "sha256-hkBoNazrDA67ER6sWhb+EKxx9nJ24+nz3zGy+zT5Hvw=" 813 867 [mod."golang.org/x/text"] 814 - version = "v0.34.0" 815 - hash = "sha256-wGKd1JkeiFROibvo2kkAuQ7JajSIfV4utGaoGbTQhQM=" 868 + version = "v0.37.0" 869 + hash = "sha256-8XDOnlPIybcDRy89fkjG5VqtIt5Ku+LmaqYhgKl7i1E=" 816 870 [mod."golang.org/x/time"] 817 871 version = "v0.12.0" 818 872 hash = "sha256-Cp3oxrCMH2wyxjzr5SHVmyhgaoUuSl56Uy00Q7DYEpw=" 873 + [mod."golang.org/x/tools"] 874 + version = "v0.44.0" 875 + hash = "sha256-xuj5FLtSJsAojLLTLXtPdLAIFNTKoVFbDMuqRXmj2W4=" 819 876 [mod."golang.org/x/xerrors"] 820 877 version = "v0.0.0-20240903120638-7835f813f4da" 821 878 hash = "sha256-bE7CcrnAvryNvM26ieJGXqbAtuLwHaGcmtVMsVnksqo=" 879 + [mod."google.golang.org/genproto/googleapis/api"] 880 + version = "v0.0.0-20260209200024-4cfbd4190f57" 881 + hash = "sha256-2C7DZwLpDDdmUhVUcRDaotbtkhQFOQ9a1SsdVC8lOqc=" 882 + [mod."google.golang.org/genproto/googleapis/rpc"] 883 + version = "v0.0.0-20260209200024-4cfbd4190f57" 884 + hash = "sha256-gdgUw1LzgVOrarF1cGBUI9uoaR/d6lur2RwxUDKnOZA=" 822 885 [mod."google.golang.org/protobuf"] 823 886 version = "v1.36.11" 824 887 hash = "sha256-7W+6jntfI/awWL3JP6yQedxqP5S9o3XvPgJ2XxxsIeE="
+309
nix/microvm/base.nix
··· 1 + { 2 + config, 3 + pkgs, 4 + lib, 5 + ... 6 + }: let 7 + # these are modules / module trees we keep. everything else is pruned. 8 + # this is a "cheap" way to save on what we ship, we don't have to recompile anything. 9 + # this saves about 118mb! 10 + keepTrees = [ 11 + "crypto" 12 + "lib" 13 + "arch" 14 + "drivers/virtio" 15 + # net: the firewall modprobes across netfilter/ipv4/ipv6; docker adds the 16 + # bridge/llc/802(stp)/xfrm machinery + NAT targets in netfilter. 17 + "net/core" 18 + "net/netfilter" 19 + "net/ipv4" 20 + "net/ipv6" 21 + "net/packet" 22 + "net/sched" 23 + "net/vmw_vsock" 24 + "net/bridge" 25 + "net/llc" 26 + "net/802" 27 + "net/xfrm" 28 + "fs/configfs" 29 + "fs/autofs" 30 + "fs/nls" 31 + "fs/unicode" 32 + ]; 33 + keepMods = [ 34 + # boot + storage + common workflow filesystems 35 + "erofs" 36 + "ext4" 37 + "jbd2" 38 + "mbcache" 39 + "overlay" 40 + "fuse" 41 + "loop" 42 + # "btrfs" 43 + # "xfs" 44 + # "f2fs" 45 + # "vfat" 46 + # "exfat" 47 + "squashfs" 48 + "isofs" 49 + "dm-mod" 50 + "zram" 51 + # virtio devices the runner exposes 52 + "virtio" 53 + "virtio_mmio" 54 + "virtio_pci" 55 + "virtio_blk" 56 + "virtio_net" 57 + "virtio_rng" 58 + "virtio_console" 59 + "vsock_loopback" 60 + "vmw_vsock_virtio_transport" 61 + "vmw_vsock_virtio_transport_common" 62 + # container networking (docker default bridge + common custom networks) 63 + "veth" 64 + "tun" 65 + "tap" 66 + "bridge" 67 + "br_netfilter" 68 + "macvlan" 69 + "ipvlan" 70 + "vxlan" 71 + "geneve" 72 + "dummy" 73 + "wireguard" 74 + ]; 75 + keepTreesFile = pkgs.writeText "keep-trees" (lib.concatStringsSep "\n" keepTrees); 76 + keepModsFile = pkgs.writeText "keep-mods" (lib.concatStringsSep "\n" keepMods); 77 + slimModulesScript = pkgs.writeText "slim-modules.py" '' 78 + import os, shutil, sys 79 + 80 + src, dst, trees_file, mods_file = sys.argv[1:5] 81 + KEEP_TREES = open(trees_file).read().split() 82 + KEEP_MODS = open(mods_file).read().split() 83 + 84 + def norm(name): 85 + return name.replace("-", "_") 86 + 87 + kerneldir = os.path.join(src, "kernel") 88 + 89 + bypath, byname = {}, {} 90 + for root, _, files in os.walk(kerneldir): 91 + for f in files: 92 + if ".ko" not in f: 93 + continue 94 + ap = os.path.join(root, f) 95 + rel = os.path.relpath(ap, src) 96 + bypath[rel] = ap 97 + byname[norm(f.split(".ko")[0])] = rel 98 + 99 + deps = {} 100 + with open(os.path.join(src, "modules.dep")) as fh: 101 + for line in fh: 102 + if ":" in line: 103 + mod, rest = line.split(":", 1) 104 + deps[mod.strip()] = rest.split() 105 + 106 + keep = set() 107 + def add(rel): 108 + if rel in keep or rel not in bypath: 109 + return 110 + keep.add(rel) 111 + for dep in deps.get(rel, []): 112 + add(dep) 113 + 114 + for tree in KEEP_TREES: 115 + for root, _, files in os.walk(os.path.join(kerneldir, tree)): 116 + for f in files: 117 + if ".ko" in f: 118 + add(os.path.relpath(os.path.join(root, f), src)) 119 + for mod in KEEP_MODS: 120 + rel = byname.get(norm(mod)) 121 + if rel: 122 + add(rel) 123 + 124 + for rel in keep: 125 + target = os.path.join(dst, rel) 126 + os.makedirs(os.path.dirname(target), exist_ok=True) 127 + shutil.copy2(bypath[rel], target) 128 + print(f"kept {len(keep)} of {len(bypath)} modules") 129 + ''; 130 + slimKernelModules = 131 + pkgs.runCommand "${config.boot.kernelPackages.kernel.name}-modules-microvm" 132 + {nativeBuildInputs = [pkgs.python3 pkgs.kmod];} 133 + '' 134 + src=${lib.getOutput "modules" config.boot.kernelPackages.kernel}/lib/modules 135 + ver=$(ls "$src") 136 + mkdir -p "$out/lib/modules/$ver" 137 + for f in "$src/$ver"/modules.builtin* "$src/$ver"/modules.order; do 138 + [ -e "$f" ] && cp "$f" "$out/lib/modules/$ver/" 139 + done 140 + python3 ${slimModulesScript} "$src/$ver" "$out/lib/modules/$ver" ${keepTreesFile} ${keepModsFile} 141 + # regen modules.dep 142 + depmod -b "$out" "$ver" 143 + ''; 144 + in { 145 + system.stateVersion = "26.05"; 146 + 147 + # actually use our slimmed down modules set 148 + system.modulesTree = lib.mkForce ([slimKernelModules] ++ config.boot.extraModulePackages); 149 + 150 + boot.initrd.includeDefaultModules = lib.mkForce false; 151 + boot.initrd.availableKernelModules = lib.mkForce []; 152 + boot.initrd.kernelModules = lib.mkForce [ 153 + "virtio_pci" 154 + "virtio_mmio" 155 + "virtio_blk" 156 + "virtio_console" 157 + "erofs" 158 + "ext4" 159 + "overlay" 160 + ]; 161 + boot.kernelModules = ["loop"]; 162 + 163 + # some zram to help situations where burst memory usage causes OOM 164 + zramSwap = { 165 + enable = true; 166 + algorithm = "zstd"; 167 + memoryPercent = 50; 168 + }; 169 + 170 + programs.nano.enable = false; 171 + # we are on a microvm we don't need the hardware map 172 + environment.etc."udev/hwdb.bin".enable = lib.mkForce false; 173 + 174 + networking.hostName = "spindle-microvm"; 175 + networking.useDHCP = false; 176 + systemd.network.networks."40-eth0" = { 177 + matchConfig.Name = "eth0"; 178 + address = ["10.0.3.15/24"]; 179 + gateway = ["10.0.3.2"]; 180 + dns = ["127.0.0.1"]; 181 + }; 182 + networking.nameservers = ["127.0.0.1"]; 183 + 184 + # this is disabled by microvm optimizations but we do need it 185 + system.switch.enable = lib.mkForce true; 186 + 187 + # don't install docs or any xdg things, not necessary 188 + documentation.enable = false; 189 + xdg.mime.enable = false; 190 + xdg.icons.enable = false; 191 + xdg.sounds.enable = false; 192 + 193 + users.groups.spindle-workflow = { 194 + gid = 970; 195 + }; 196 + users.users.spindle-workflow = { 197 + isSystemUser = true; 198 + uid = 970; 199 + group = "spindle-workflow"; 200 + home = "/workspace"; 201 + createHome = false; 202 + }; 203 + users.users.spindle-workflow.extraGroups = lib.mkIf config.virtualisation.docker.enable [ 204 + "docker" 205 + ]; 206 + virtualisation.docker.listenOptions = [ 207 + "/run/docker.sock" 208 + "/var/run/docker.sock" 209 + ]; 210 + 211 + nix = { 212 + settings = { 213 + experimental-features = [ 214 + "nix-command" 215 + "flakes" 216 + ]; 217 + trusted-users = ["root"]; 218 + allowed-users = ["spindle-workflow"]; 219 + }; 220 + registry.nixpkgs.to = { 221 + type = "path"; 222 + path = pkgs.path; 223 + }; 224 + extraOptions = '' 225 + extra-experimental-features = nix-command flakes 226 + !include /run/spindle/nix.conf 227 + ''; 228 + nixPath = ["nixpkgs=${config.nix.registry.nixpkgs.to.path}"]; 229 + }; 230 + 231 + systemd.tmpfiles.rules = [ 232 + "d /run/spindle 0755 root root -" 233 + "d /workspace 0755 spindle-workflow spindle-workflow -" 234 + "d /workspace/repo 0755 spindle-workflow spindle-workflow -" 235 + ]; 236 + 237 + # add any common packages / services here 238 + environment.systemPackages = [pkgs.gitMinimal]; 239 + # disable default nixos packages ([perl rsync strace]) 240 + environment.defaultPackages = []; 241 + # this removed nixos-rebuild-ng and nixos-generate-config, which lets us 242 + # remove python3 closure (~107MB) 243 + system.disableInstallerTools = true; 244 + 245 + # a single volume that will back /workspace, /var, and the nix store 246 + microvm.storeOnDisk = true; 247 + microvm.storeDiskType = "erofs"; 248 + # lz4hc, not zstd: the stock nixpkgs kernel builds erofs without 249 + # CONFIG_EROFS_FS_ZIP_ZSTD, so a zstd image fails to mount at boot ("algorithm 250 + # 3 isn't enabled on this kernel"); only lz4 is guaranteed. -Efragments and 251 + # -Ededupe are omitted because microvm.nix falls back to single-threaded 252 + # erofs-utils when either is present, which makes image builds really slow. 253 + # for now, we take the compression hit, which isn't too much anyway. 254 + # todo(dawn): the remaining big save needs a custom guest kernel (we'd want a 255 + # binary cache first so downstream users don't rebuild it every time): enable 256 + # EROFS_FS_ZIP_ZSTD for a better-compressing store-disk, build the essentials 257 + # (virtio/erofs/ext4/overlay/netfilter) in as =y, and strip the kernel image 258 + # itself. the modules tree is already pruned without a recompile, see 259 + # slimKernelModules above. 260 + microvm.storeDiskErofsFlags = [ 261 + "-zlz4hc" 262 + "-Eztailpacking" 263 + "-C131072" # bigger compression window lets lz4hc compress better (~47mb) 264 + ]; 265 + microvm.writableStoreOverlay = "/persist/rw-store"; 266 + microvm.volumes = [ 267 + { 268 + image = "persist.img"; 269 + mountPoint = "/persist"; 270 + size = 1024 * 16; 271 + fsType = "ext4"; 272 + } 273 + ]; 274 + 275 + # /persist must be mounted before the writable store overlay activates 276 + fileSystems."/persist".neededForBoot = true; 277 + 278 + fileSystems."/workspace" = { 279 + device = "/persist/workspace"; 280 + fsType = "none"; 281 + options = ["bind"]; 282 + depends = ["/persist"]; 283 + }; 284 + # bind mounting /var is important since docker etc. can't use overlayfs 285 + # (overlayfs on overlayfs does not work) 286 + fileSystems."/var" = { 287 + device = "/persist/var"; 288 + fsType = "none"; 289 + options = ["bind"]; 290 + depends = ["/persist"]; 291 + }; 292 + 293 + # create bind sources before local-fs.target, which means we have to do this 294 + # at initrd time 295 + boot.initrd.systemd.enable = true; 296 + boot.initrd.systemd.tmpfiles.settings."00-persist-layout" = { 297 + "/sysroot/persist/rw-store".d = { 298 + mode = "0755"; 299 + }; 300 + "/sysroot/persist/workspace".d = { 301 + mode = "0755"; 302 + user = "spindle-workflow"; 303 + group = "spindle-workflow"; 304 + }; 305 + "/sysroot/persist/var".d = { 306 + mode = "0755"; 307 + }; 308 + }; 309 + }
+25
nix/microvm/qemu.nix
··· 1 + {...}: { 2 + microvm = { 3 + hypervisor = "qemu"; 4 + qemu.machine = "microvm"; 5 + 6 + optimize.enable = true; 7 + 8 + vcpu = 2; 9 + # don't set to 2048, https://github.com/microvm-nix/microvm.nix/issues/171 10 + mem = 2049; 11 + 12 + interfaces = [ 13 + { 14 + type = "user"; 15 + id = "net0"; 16 + mac = "02:00:00:00:10:01"; 17 + } 18 + ]; 19 + vsock.cid = 3; 20 + 21 + socket = "control.socket"; 22 + }; 23 + 24 + boot.kernelModules = ["vsock_loopback"]; 25 + }
+48
nix/microvm/spindle-vm.nix
··· 1 + { 2 + self, 3 + microvm, 4 + }: runnerModule: { 5 + imports = [ 6 + microvm.nixosModules.microvm 7 + ./base.nix 8 + runnerModule 9 + self.nixosModules.shuttle 10 + ({pkgs, ...}: { 11 + services.tangled.shuttle.enable = true; 12 + 13 + environment.etc = { 14 + "spindle/nixos/base.nix".source = ./base.nix; 15 + "spindle/nixos/runner.nix".source = runnerModule; 16 + "spindle/nixos/shuttle.nix".text = '' 17 + { config, lib, pkgs, ... }: 18 + { 19 + imports = [${../modules/shuttle.nix}]; 20 + services.tangled.shuttle.package = lib.mkDefault ${self.packages.${pkgs.stdenv.hostPlatform.system}.shuttle}; 21 + } 22 + ''; 23 + "spindle/nixos/user-config.nix".source = ./user-config.nix; 24 + "spindle/nixos/microvm".source = microvm; 25 + # pkgs.path is fine here because we pass the nixpkgs source into the vm in ./base.nix 26 + "spindle/nixos/default.nix".text = '' 27 + let 28 + nixpkgs = ${pkgs.path}; 29 + nixos = import (nixpkgs + "/nixos") { 30 + system = "${pkgs.stdenv.hostPlatform.system}"; 31 + configuration = { 32 + imports = [ 33 + /etc/spindle/nixos/microvm/nixos-modules/microvm/default.nix 34 + /etc/spindle/nixos/base.nix 35 + /etc/spindle/nixos/runner.nix 36 + /etc/spindle/nixos/shuttle.nix 37 + /etc/spindle/nixos/user-config.nix 38 + ]; 39 + services.tangled.shuttle.enable = true; 40 + }; 41 + }; 42 + in 43 + nixos.system 44 + ''; 45 + }; 46 + }) 47 + ]; 48 + }
+110
nix/microvm/user-config.nix
··· 1 + { 2 + pkgs, 3 + lib, 4 + options, 5 + ... 6 + } @ args: let 7 + configPath = /run/spindle/user-config/config.json; 8 + userConfig = 9 + args.userConfig 10 + or ( 11 + if builtins.pathExists configPath 12 + then lib.importJSON configPath 13 + else {} 14 + ); 15 + 16 + registry = userConfig.registry or {}; 17 + 18 + # registry targets may be structured attrs or flake ref strings; strings are 19 + # parsed by nix itself in getFlake. flakeRefToString rejects unforced attr 20 + # values, hence the toJSON round-trip 21 + toRefString = target: 22 + if builtins.isAttrs target 23 + then builtins.flakeRefToString (builtins.fromJSON (builtins.toJSON target)) 24 + else target; 25 + 26 + # user registry entries shadow the system registry (which pins nixpkgs) 27 + getFlake = ref: builtins.getFlake (toRefString (registry.${ref} or ref)); 28 + 29 + # "flakeref#attr" or a bare attr looked up in nixpkgs. nixpkgs refs use the 30 + # already-evaluated pkgs directly instead of re-evaluating via getFlake, 31 + # unless the user remapped nixpkgs in their registry 32 + resolvePackage = ref: let 33 + parts = lib.splitString "#" ref; 34 + hasAttr = lib.length parts > 1; 35 + flakeRef = 36 + if hasAttr 37 + then lib.head parts 38 + else "nixpkgs"; 39 + pkgName = 40 + if hasAttr 41 + then lib.elemAt parts 1 42 + else ref; 43 + system = pkgs.stdenv.hostPlatform.system; 44 + flake = getFlake flakeRef; 45 + notFound = throw "Package ${pkgName} not found in ${flakeRef}"; 46 + in 47 + if flakeRef == "nixpkgs" && !(registry ? nixpkgs) 48 + then pkgs.${pkgName} or notFound 49 + else flake.legacyPackages.${system}.${pkgName} or flake.packages.${system}.${pkgName} or notFound; 50 + 51 + # strings are resolved as package references only where the option type 52 + # actually expects packages; everything else passes through untouched 53 + resolveForType = type: v: 54 + if type.name == "package" && builtins.isString v 55 + then resolvePackage v 56 + # path-typed options (e.g. services.udev.packages) accept derivations via 57 + # coercion; "#" disambiguates flake refs from actual paths, which are 58 + # always absolute 59 + else if type.name == "path" && builtins.isString v && lib.hasInfix "#" v && !lib.hasPrefix "/" v 60 + then resolvePackage v 61 + else if type.name == "nullOr" && v != null 62 + then resolveForType type.nestedTypes.elemType v 63 + else if type.name == "listOf" && builtins.isList v 64 + then map (resolveForType type.nestedTypes.elemType) v 65 + else if (type.name == "attrsOf" || type.name == "lazyAttrsOf") && builtins.isAttrs v 66 + then builtins.mapAttrs (_: resolveForType type.nestedTypes.elemType) v 67 + else if type.name == "submodule" && builtins.isAttrs v 68 + then resolveOptions (type.getSubOptions []) v 69 + else v; 70 + 71 + resolveOptions = opts: builtins.mapAttrs (name: resolveValue (opts.${name} or null)); 72 + 73 + resolveValue = opt: v: 74 + if !builtins.isAttrs opt 75 + then v 76 + else if lib.isOption opt 77 + then resolveForType opt.type v 78 + else if builtins.isAttrs v 79 + then resolveOptions opt v 80 + else v; 81 + 82 + # `foo = true` is shorthand for `foo.enable = true`, but only when an 83 + # enable option actually exists under foo 84 + hasEnableOption = opt: 85 + builtins.isAttrs opt 86 + && ( 87 + if lib.isOption opt 88 + then (opt.type.getSubOptions opt.loc) ? enable 89 + else opt ? enable && lib.isOption opt.enable 90 + ); 91 + 92 + normalize = opts: name: v: let 93 + opt = opts.${name} or null; 94 + in 95 + if builtins.isBool v && hasEnableOption opt 96 + then {enable = v;} 97 + else resolveValue opt v; 98 + in { 99 + nix.registry = builtins.mapAttrs (name: _: 100 + lib.mkForce { 101 + to = { 102 + type = "path"; 103 + path = (getFlake name).outPath; 104 + }; 105 + }) 106 + registry; 107 + environment.systemPackages = map resolvePackage (userConfig.dependencies or []); 108 + services = builtins.mapAttrs (normalize (options.services or {})) (userConfig.services or {}); 109 + virtualisation = builtins.mapAttrs (normalize (options.virtualisation or {})) (userConfig.virtualisation or {}); 110 + }
+57
nix/modules/shuttle.nix
··· 1 + { 2 + config, 3 + lib, 4 + pkgs, 5 + ... 6 + }: let 7 + cfg = config.services.tangled.shuttle; 8 + 9 + postBuildHook = pkgs.writeShellApplication { 10 + name = "spindle-post-build-hook"; 11 + text = '' 12 + set -f 13 + 14 + if [ -z "''${OUT_PATHS:-}" ]; then 15 + exit 0 16 + fi 17 + 18 + # OUT_PATHS is intentionally split into individual store paths for the agent 19 + # shellcheck disable=SC2086 20 + exec ${cfg.package}/bin/shuttle enqueue-built-paths $OUT_PATHS 21 + ''; 22 + }; 23 + in { 24 + options.services.tangled.shuttle = { 25 + enable = lib.mkEnableOption "the shuttle guest agent"; 26 + 27 + package = lib.mkOption { 28 + type = lib.types.package; 29 + description = "package providing the shuttle executable."; 30 + }; 31 + }; 32 + 33 + config = lib.mkIf cfg.enable { 34 + nix.settings.post-build-hook = "${postBuildHook}/bin/spindle-post-build-hook"; 35 + 36 + systemd.services.shuttle = { 37 + description = "shuttle guest agent"; 38 + wantedBy = ["multi-user.target"]; 39 + wants = ["network-online.target"]; 40 + after = [ 41 + "local-fs.target" 42 + "network-online.target" 43 + ]; 44 + before = ["nix-daemon.service"]; 45 + restartIfChanged = false; 46 + environment = { 47 + NIX_PATH = lib.concatStringsSep ":" config.nix.nixPath; 48 + }; 49 + serviceConfig = { 50 + Type = "simple"; 51 + ExecStart = "${cfg.package}/bin/shuttle"; 52 + Restart = "always"; 53 + RestartSec = "1s"; 54 + }; 55 + }; 56 + }; 57 + }
+222 -54
nix/modules/spindle.nix
··· 1 1 { 2 2 config, 3 3 lib, 4 + pkgs, 4 5 ... 5 6 }: let 6 7 cfg = config.services.tangled.spindle; ··· 75 76 description = "Maximum number of jobs queue up"; 76 77 }; 77 78 78 - maxConcurrentWorkflows = mkOption { 79 - type = types.int; 80 - default = 8; 81 - description = "Maximum number of workflow containers running simultaneously (controls total memory usage)"; 82 - }; 83 - 84 79 secrets = { 85 80 provider = mkOption { 86 81 type = types.str; ··· 136 131 }; 137 132 138 133 pipelines = { 139 - nixery = mkOption { 134 + logBucket = mkOption { 140 135 type = types.str; 141 - default = "nixery.tangled.sh"; # note: this is *not* on tangled.org yet 142 - description = "Nixery instance to use"; 136 + default = "tangled-logs"; 137 + description = "S3 bucket for workflow logs"; 143 138 }; 144 - 145 139 workflowTimeout = mkOption { 146 140 type = types.str; 147 141 default = "5m"; 148 - description = "Timeout for each step of a pipeline"; 142 + description = "Timeout for each workflow step"; 149 143 }; 150 144 151 - maxJobMemoryMb = mkOption { 152 - type = types.int; 153 - default = 6144; 154 - description = "Memory limit per workflow container in MiB (default 6 GiB)"; 145 + nixery = { 146 + nixery = mkOption { 147 + type = types.str; 148 + default = "nixery.tangled.sh"; # note: this is *not* on tangled.org yet 149 + description = "Nixery instance to use"; 150 + }; 151 + 152 + maxJobMemoryMb = mkOption { 153 + type = types.int; 154 + default = 6144; 155 + description = "Memory limit per nixery workflow container in MiB (default 6 GiB)"; 156 + }; 157 + maxConcurrentWorkflows = mkOption { 158 + type = types.int; 159 + default = 8; 160 + description = "Maximum number of nixery workflows running simultaneously. Zero disables this limit."; 161 + }; 155 162 }; 156 163 157 - logBucket = mkOption { 164 + microvm = { 165 + enableKVM = mkOption { 166 + type = types.bool; 167 + default = true; 168 + description = "Enable KVM hardware acceleration"; 169 + }; 170 + 171 + imageDir = mkOption { 172 + type = types.str; 173 + default = "/var/lib/spindle/images"; 174 + description = "Directory containing microVM image spec JSONs or image spec directories"; 175 + }; 176 + overlayDir = mkOption { 177 + type = types.str; 178 + default = "/tmp"; 179 + description = "Directory to store microVM temporary overlay files"; 180 + }; 181 + defaultImage = mkOption { 182 + type = types.str; 183 + default = "nixos"; 184 + description = "Default microVM image spec to use if none is specified in workflow"; 185 + }; 186 + agentPort = mkOption { 187 + type = types.port; 188 + default = 10240; 189 + description = "Host vsock port the microVM agent connects back to"; 190 + }; 191 + 192 + limits = { 193 + total = { 194 + memoryMiB = mkOption { 195 + type = types.int; 196 + default = 0; 197 + description = "Maximum declared guest memory in MiB allowed across all running microVM workflows. Zero disables this limit."; 198 + }; 199 + vcpus = mkOption { 200 + type = types.int; 201 + default = 0; 202 + description = "Maximum declared vCPUs allowed across all running microVM workflows. Zero disables this limit."; 203 + }; 204 + diskMiB = mkOption { 205 + type = types.int; 206 + default = 0; 207 + description = "Maximum declared disk in MiB allowed across all running microVM workflows. Zero disables this limit."; 208 + }; 209 + }; 210 + 211 + workflow = { 212 + memoryMiB = mkOption { 213 + type = types.int; 214 + default = 0; 215 + description = "Maximum declared guest memory in MiB allowed for a single microVM workflow. Zero disables this limit."; 216 + }; 217 + vcpus = mkOption { 218 + type = types.int; 219 + default = 0; 220 + description = "Maximum declared vCPUs allowed for a single microVM workflow. Zero disables this limit."; 221 + }; 222 + diskMiB = mkOption { 223 + type = types.int; 224 + default = 0; 225 + description = "Maximum declared disk in MiB allowed for a single microVM workflow. Zero disables this limit."; 226 + }; 227 + }; 228 + }; 229 + 230 + cgroup = { 231 + enable = mkOption { 232 + type = types.bool; 233 + default = false; 234 + description = "Enable cgroup v2 containment for microVM processes."; 235 + }; 236 + parent = mkOption { 237 + type = types.str; 238 + default = "self"; 239 + description = "Parent cgroup for microVM workflow cgroups. Use 'self' to resolve the spindle service cgroup."; 240 + }; 241 + pidsMax = mkOption { 242 + type = types.int; 243 + default = 4096; 244 + description = "Maximum number of processes allowed in each microVM workflow cgroup."; 245 + }; 246 + swapMaxMiB = mkOption { 247 + type = types.int; 248 + default = 0; 249 + description = "Maximum swap in MiB allowed in each microVM workflow cgroup. Zero disables swap."; 250 + }; 251 + supervisorMinMiB = mkOption { 252 + type = types.int; 253 + default = 512; 254 + description = '' 255 + Amount of memory in MiB that will be protected by the cgroup for the spindle 256 + (allowing it to not get OOMed first.) 257 + ''; 258 + }; 259 + }; 260 + }; 261 + }; 262 + 263 + cache = { 264 + readUrls = mkOption { 265 + type = types.listOf types.str; 266 + default = []; 267 + example = ["http://ncps.internal:8501"]; 268 + description = "Nix binary cache URLs the Spindle guest should read from."; 269 + }; 270 + 271 + trustedPublicKeys = mkOption { 272 + type = types.listOf types.str; 273 + default = []; 274 + example = ["ncps.internal-1:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="]; 275 + description = "Public keys trusted for the configured Nix binary caches."; 276 + }; 277 + 278 + uploadUrl = mkOption { 158 279 type = types.str; 159 - default = "tangled-logs"; 160 - description = "S3 bucket for workflow logs"; 280 + default = ""; 281 + example = "http://ncps.internal:8501/upload"; 282 + description = "Optional cache upload URL used by live cache import paths."; 161 283 }; 162 284 }; 163 285 ··· 177 299 }; 178 300 }; 179 301 180 - config = mkIf cfg.enable { 181 - virtualisation.docker.enable = true; 302 + config = let 303 + deps = [ 304 + pkgs.qemu 305 + pkgs.e2fsprogs 306 + pkgs.slirp4netns 307 + pkgs.iproute2 308 + pkgs.util-linux 309 + ]; 310 + in 311 + mkIf cfg.enable { 312 + environment.systemPackages = [ 313 + (pkgs.writeShellScriptBin "spindle" '' 314 + export PATH="${lib.makeBinPath deps}:$PATH" 315 + ${lib.optionalString (cfg.environmentFile != null) "set -a; source ${cfg.environmentFile}; set +a"} 316 + ${lib.concatMapStringsSep "\n" ( 317 + e: "export ${e}" 318 + ) 319 + config.systemd.services.spindle.serviceConfig.Environment} 320 + exec ${cfg.package}/bin/spindle "$@" 321 + '') 322 + ]; 182 323 183 - systemd.services.spindle = { 184 - description = "spindle service"; 185 - after = ["network.target" "docker.service"]; 186 - wantedBy = ["multi-user.target"]; 187 - serviceConfig = { 188 - LogsDirectory = "spindle"; 189 - StateDirectory = "spindle"; 190 - EnvironmentFile = mkIf (cfg.environmentFile != null) cfg.environmentFile; 324 + virtualisation.docker.enable = true; 191 325 192 - Environment = [ 193 - "SPINDLE_SERVER_LISTEN_ADDR=${cfg.server.listenAddr}" 194 - "SPINDLE_SERVER_DB_PATH=${cfg.server.dbPath}" 195 - "SPINDLE_SERVER_HOSTNAME=${cfg.server.hostname}" 196 - "SPINDLE_SERVER_PLC_URL=${cfg.server.plcUrl}" 197 - "SPINDLE_SERVER_JETSTREAM_ENDPOINT=${cfg.server.jetstreamEndpoint}" 198 - "SPINDLE_SERVER_DEV=${lib.boolToString cfg.server.dev}" 199 - "SPINDLE_SERVER_OWNER=${cfg.server.owner}" 200 - "SPINDLE_SERVER_MAX_JOB_COUNT=${toString cfg.server.maxJobCount}" 201 - "SPINDLE_SERVER_QUEUE_SIZE=${toString cfg.server.queueSize}" 202 - "SPINDLE_SERVER_MAX_CONCURRENT_WORKFLOWS=${toString cfg.server.maxConcurrentWorkflows}" 203 - "SPINDLE_SERVER_SECRETS_PROVIDER=${cfg.server.secrets.provider}" 204 - "SPINDLE_SERVER_SECRETS_OPENBAO_PROXY_ADDR=${cfg.server.secrets.openbao.proxyAddr}" 205 - "SPINDLE_SERVER_SECRETS_OPENBAO_MOUNT=${cfg.server.secrets.openbao.mount}" 206 - "SPINDLE_SERVER_TAP_EMBED=${lib.boolToString cfg.server.tap.embed}" 207 - "SPINDLE_SERVER_TAP_URL=${cfg.server.tap.url}" 208 - "SPINDLE_SERVER_TAP_BIND=${cfg.server.tap.bind}" 209 - "SPINDLE_SERVER_TAP_DB_PATH=${cfg.server.tap.dbPath}" 210 - "SPINDLE_SERVER_TAP_RELAY_URL=${cfg.server.tap.relayUrl}" 211 - "SPINDLE_NIXERY_PIPELINES_NIXERY=${cfg.pipelines.nixery}" 212 - "SPINDLE_NIXERY_PIPELINES_WORKFLOW_TIMEOUT=${cfg.pipelines.workflowTimeout}" 213 - "SPINDLE_NIXERY_PIPELINES_MAX_JOB_MEMORY_MB=${toString cfg.pipelines.maxJobMemoryMb}" 214 - "SPINDLE_S3_LOG_BUCKET=${cfg.pipelines.logBucket}" 326 + systemd.services.spindle = { 327 + description = "spindle service"; 328 + after = [ 329 + "network.target" 330 + "docker.service" 215 331 ]; 216 - ExecStart = "${cfg.package}/bin/spindle"; 217 - Restart = "always"; 332 + wantedBy = ["multi-user.target"]; 333 + path = deps; 334 + serviceConfig = { 335 + LogsDirectory = "spindle"; 336 + StateDirectory = "spindle"; 337 + Delegate = cfg.pipelines.microvm.cgroup.enable; 338 + EnvironmentFile = mkIf (cfg.environmentFile != null) cfg.environmentFile; 339 + 340 + Environment = [ 341 + "SPINDLE_SERVER_LISTEN_ADDR=${cfg.server.listenAddr}" 342 + "SPINDLE_SERVER_DB_PATH=${cfg.server.dbPath}" 343 + "SPINDLE_SERVER_HOSTNAME=${cfg.server.hostname}" 344 + "SPINDLE_SERVER_PLC_URL=${cfg.server.plcUrl}" 345 + "SPINDLE_SERVER_JETSTREAM_ENDPOINT=${cfg.server.jetstreamEndpoint}" 346 + "SPINDLE_SERVER_DEV=${lib.boolToString cfg.server.dev}" 347 + "SPINDLE_SERVER_OWNER=${cfg.server.owner}" 348 + "SPINDLE_SERVER_MAX_JOB_COUNT=${toString cfg.server.maxJobCount}" 349 + "SPINDLE_SERVER_QUEUE_SIZE=${toString cfg.server.queueSize}" 350 + "SPINDLE_SERVER_SECRETS_PROVIDER=${cfg.server.secrets.provider}" 351 + "SPINDLE_SERVER_SECRETS_OPENBAO_PROXY_ADDR=${cfg.server.secrets.openbao.proxyAddr}" 352 + "SPINDLE_SERVER_SECRETS_OPENBAO_MOUNT=${cfg.server.secrets.openbao.mount}" 353 + "SPINDLE_SERVER_TAP_EMBED=${lib.boolToString cfg.server.tap.embed}" 354 + "SPINDLE_SERVER_TAP_URL=${cfg.server.tap.url}" 355 + "SPINDLE_SERVER_TAP_BIND=${cfg.server.tap.bind}" 356 + "SPINDLE_SERVER_TAP_DB_PATH=${cfg.server.tap.dbPath}" 357 + "SPINDLE_SERVER_TAP_RELAY_URL=${cfg.server.tap.relayUrl}" 358 + "SPINDLE_NIXERY_PIPELINES_NIXERY=${cfg.pipelines.nixery.nixery}" 359 + "SPINDLE_NIXERY_PIPELINES_WORKFLOW_TIMEOUT=${cfg.pipelines.workflowTimeout}" 360 + "SPINDLE_NIXERY_PIPELINES_MAX_JOB_MEMORY_MB=${toString cfg.pipelines.nixery.maxJobMemoryMb}" 361 + "SPINDLE_NIXERY_PIPELINES_MAX_CONCURRENT_WORKFLOWS=${toString cfg.pipelines.nixery.maxConcurrentWorkflows}" 362 + "SPINDLE_MICROVM_PIPELINES_IMAGE_DIR=${cfg.pipelines.microvm.imageDir}" 363 + "SPINDLE_MICROVM_PIPELINES_OVERLAY_DIR=${cfg.pipelines.microvm.overlayDir}" 364 + "SPINDLE_MICROVM_PIPELINES_DEFAULT_IMAGE=${cfg.pipelines.microvm.defaultImage}" 365 + "SPINDLE_MICROVM_PIPELINES_AGENT_PORT=${toString cfg.pipelines.microvm.agentPort}" 366 + "SPINDLE_MICROVM_PIPELINES_ENABLE_KVM=${lib.boolToString cfg.pipelines.microvm.enableKVM}" 367 + "SPINDLE_MICROVM_PIPELINES_WORKFLOW_TIMEOUT=${cfg.pipelines.workflowTimeout}" 368 + "SPINDLE_MICROVM_PIPELINES_MAX_TOTAL_MEMORY_MIB=${toString cfg.pipelines.microvm.limits.total.memoryMiB}" 369 + "SPINDLE_MICROVM_PIPELINES_MAX_TOTAL_VCPUS=${toString cfg.pipelines.microvm.limits.total.vcpus}" 370 + "SPINDLE_MICROVM_PIPELINES_MAX_TOTAL_DISK_MIB=${toString cfg.pipelines.microvm.limits.total.diskMiB}" 371 + "SPINDLE_MICROVM_PIPELINES_MAX_WORKFLOW_MEMORY_MIB=${toString cfg.pipelines.microvm.limits.workflow.memoryMiB}" 372 + "SPINDLE_MICROVM_PIPELINES_MAX_WORKFLOW_VCPUS=${toString cfg.pipelines.microvm.limits.workflow.vcpus}" 373 + "SPINDLE_MICROVM_PIPELINES_MAX_WORKFLOW_DISK_MIB=${toString cfg.pipelines.microvm.limits.workflow.diskMiB}" 374 + "SPINDLE_MICROVM_PIPELINES_ENABLE_CGROUPS=${lib.boolToString cfg.pipelines.microvm.cgroup.enable}" 375 + "SPINDLE_MICROVM_PIPELINES_CGROUP_PARENT=${cfg.pipelines.microvm.cgroup.parent}" 376 + "SPINDLE_MICROVM_PIPELINES_CGROUP_PIDS_MAX=${toString cfg.pipelines.microvm.cgroup.pidsMax}" 377 + "SPINDLE_MICROVM_PIPELINES_CGROUP_SWAP_MAX_MIB=${toString cfg.pipelines.microvm.cgroup.swapMaxMiB}" 378 + "SPINDLE_MICROVM_PIPELINES_CGROUP_SUPERVISOR_MEMORY_MIN_MIB=${toString cfg.pipelines.microvm.cgroup.supervisorMinMiB}" 379 + "SPINDLE_NIX_CACHE_READ_URLS=${concatStringsSep "," cfg.cache.readUrls}" 380 + "SPINDLE_NIX_CACHE_TRUSTED_PUBLIC_KEYS=${concatStringsSep "," cfg.cache.trustedPublicKeys}" 381 + "SPINDLE_NIX_CACHE_UPLOAD_URL=${cfg.cache.uploadUrl}" 382 + "SPINDLE_S3_LOG_BUCKET=${cfg.pipelines.logBucket}" 383 + ]; 384 + ExecStart = "${cfg.package}/bin/spindle"; 385 + Restart = "always"; 386 + }; 218 387 }; 219 388 }; 220 - }; 221 389 }
+23
nix/pkgs/shuttle.nix
··· 1 + { 2 + rustPlatform, 3 + src, 4 + protobuf, 5 + ... 6 + }: let 7 + flags = ["--bin" "shuttle" "--package" "shuttle"]; 8 + in 9 + rustPlatform.buildRustPackage { 10 + pname = "shuttle"; 11 + version = "0.1.0"; 12 + 13 + inherit src; 14 + 15 + cargoLock.lockFile = "${src}/Cargo.lock"; 16 + 17 + nativeBuildInputs = [ 18 + protobuf 19 + ]; 20 + 21 + cargoBuildFlags = flags; 22 + cargoTestFlags = flags; 23 + }
+222
nix/pkgs/spindle-alpine-image.nix
··· 1 + { 2 + pkgsStatic, 3 + runCommand, 4 + writeText, 5 + squashfsTools, 6 + shuttle, 7 + binutils, 8 + publicsuffix-list, 9 + rootfs, 10 + kernel, 11 + initramfs, 12 + modloop, 13 + repositories, 14 + arch ? "x86_64", 15 + }: let 16 + nix = pkgsStatic.nixStatic; 17 + bash = pkgsStatic.bashNonInteractive; 18 + git = 19 + (pkgsStatic.gitMinimal.override { 20 + curl = pkgsStatic.curlMinimal; 21 + pythonSupport = false; 22 + withManual = false; 23 + nlsSupport = false; 24 + }).overrideAttrs (old: { 25 + doCheck = false; 26 + doInstallCheck = false; 27 + configureFlags = (old.configureFlags or []) ++ ["ac_cv_lib_curl_curl_global_init=yes"]; 28 + }); 29 + guestTools = [nix bash git]; 30 + 31 + # run by busybox at sysinit 32 + setupScript = writeText "spindle-setup" '' 33 + #!/bin/sh 34 + 35 + mountpoint -q /proc || mount -t proc proc /proc 36 + mountpoint -q /sys || mount -t sysfs sys /sys 37 + mountpoint -q /dev || mount -t devtmpfs dev /dev 38 + mountpoint -q /dev/pts || { 39 + mkdir -p /dev/pts 40 + mount -t devpts devpts /dev/pts 41 + } 42 + mountpoint -q /dev/shm || { 43 + mkdir -p /dev/shm 44 + mount -t tmpfs -o mode=1777 shm /dev/shm 45 + } 46 + mountpoint -q /run || mount -t tmpfs -o mode=0755 run /run 47 + mountpoint -q /tmp || mount -t tmpfs -o mode=1777 tmp /tmp 48 + 49 + # the initramfs mdev leaves these 0660, which breaks non-root workflows 50 + chmod 666 /dev/null /dev/zero /dev/full /dev/random /dev/urandom /dev/tty /dev/ptmx 2>/dev/null 51 + 52 + modprobe vmw_vsock_virtio_transport 53 + # shuttle's cache enqueue listener binds a guest-local (CID 1) vsock 54 + modprobe vsock_loopback 55 + modprobe ext4 56 + 57 + # /dev/vda is the squashfs root; the first spindle volume backs /workspace 58 + if [ -b /dev/vdb ]; then 59 + mount -t ext4 /dev/vdb /workspace 60 + mkdir -p /workspace/repo 61 + chown spindle-workflow:spindle-workflow /workspace /workspace/repo 62 + fi 63 + 64 + ip link set lo up 65 + ip link set eth0 up 66 + ip addr add 10.0.3.15/24 dev eth0 67 + ip route add default via 10.0.3.2 68 + hostname -F /etc/hostname 69 + ''; 70 + 71 + inittab = writeText "inittab" '' 72 + ::sysinit:/sbin/spindle-setup 73 + ::respawn:/usr/local/bin/nix-daemon 74 + ::respawn:env NIX_REMOTE=daemon /usr/bin/shuttle 75 + ::ctrlaltdel:/sbin/reboot 76 + ''; 77 + 78 + profileScript = writeText "spindle-profile" '' 79 + export SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt 80 + export GIT_SSL_CAINFO=/etc/ssl/certs/ca-certificates.crt 81 + export NIX_REMOTE=daemon 82 + ''; 83 + 84 + # mirror nix/microvm/base.nix and nix/modules/shuttle.nix 85 + nixConf = writeText "nix.conf" '' 86 + experimental-features = nix-command flakes 87 + trusted-users = root 88 + allowed-users = spindle-workflow 89 + post-build-hook = /usr/libexec/spindle-post-build-hook 90 + !include /run/spindle/nix.conf 91 + ''; 92 + 93 + apkRepositories = writeText "apk-repositories" (builtins.concatStringsSep "\n" repositories + "\n"); 94 + 95 + postBuildHook = writeText "spindle-post-build-hook" '' 96 + #!/bin/sh 97 + set -f 98 + 99 + if [ -z "''${OUT_PATHS:-}" ]; then 100 + exit 0 101 + fi 102 + 103 + # OUT_PATHS is intentionally split into individual store paths 104 + exec /usr/bin/shuttle enqueue-built-paths $OUT_PATHS 105 + ''; 106 + 107 + imageSpecJSON = writeText "spec.json" ( 108 + builtins.toJSON { 109 + inherit arch; 110 + bootArgs = "earlyprintk=ttyS0 console=hvc0 reboot=t panic=-1 root=/dev/vda rootfstype=squashfs modules=virtio_blk,virtio_net,virtio_console overlaytmpfs=yes init=/sbin/init"; 111 + kernel = "kernel"; 112 + initrd = "initrd"; 113 + runnerType = "qemu"; 114 + runnerConfig = { 115 + cpu = "host,+x2apic,-sgx"; 116 + machine = "microvm,accel=kvm:tcg,acpi=on,mem-merge=on,pcie=off,pic=off,pit=off,rtc=on,usb=off"; 117 + console = "hvc0"; 118 + extraArgs = []; 119 + }; 120 + memoryMiB = 2048; 121 + storeDisk = "store-disk"; 122 + storeDiskType = "squashfs"; 123 + vcpus = 2; 124 + shell = "/usr/local/bin/bash"; 125 + networkInterfaces = [ 126 + { 127 + type = "slirp4netns"; 128 + id = "net0"; 129 + mac = "02:00:00:00:10:01"; 130 + } 131 + ]; 132 + volumes = [ 133 + { 134 + fsType = "ext4"; 135 + image = "workspace.img"; 136 + imageType = "raw"; 137 + mountPoint = "/workspace"; 138 + readOnly = false; 139 + sizeMiB = 1024 * 10; 140 + } 141 + ]; 142 + } 143 + ); 144 + in 145 + runCommand "spindle-alpine-image-${arch}" { 146 + nativeBuildInputs = [squashfsTools binutils]; 147 + } '' 148 + mkdir -p rootfs 149 + tar -xzpf ${rootfs} -C rootfs 150 + 151 + # kernel modules from modloop (ships its own modules.dep, no depmod needed) 152 + unsquashfs -q -d modloop ${modloop} 153 + mkdir -p rootfs/lib/modules 154 + cp -a modloop/modules/* rootfs/lib/modules/ 155 + 156 + install -D -m 0755 ${shuttle}/bin/shuttle rootfs/usr/bin/shuttle 157 + install -D -m 0755 ${setupScript} rootfs/sbin/spindle-setup 158 + install -D -m 0644 ${inittab} rootfs/etc/inittab 159 + install -D -m 0644 ${profileScript} rootfs/etc/profile.d/01-spindle.sh 160 + install -D -m 0644 ${nixConf} rootfs/etc/nix/nix.conf 161 + install -D -m 0755 ${postBuildHook} rootfs/usr/libexec/spindle-post-build-hook 162 + 163 + # install dependencies 164 + # we only copy binaries + libexec for minimal deps so the image size doesn't 165 + # increase so much (if we copy the whole guestTools closure for example, it 166 + # doubles the disk size) 167 + mkdir -p rootfs/nix/store rootfs/usr/local/bin 168 + for pkg in ${toString guestTools}; do 169 + for bin in "$pkg/bin/"*; do 170 + [[ -e "$bin" ]] || continue 171 + name=$(basename "$bin") 172 + # we resolve symlinks as to copy the actual binaries 173 + if [[ -L "$bin" ]]; then 174 + real=$(readlink "$bin") 175 + else 176 + real="$bin" 177 + fi 178 + # handle symlinks properly 179 + if [[ "$real" != /nix/store* ]]; then 180 + ln -vsf "$real" "rootfs/usr/local/bin/$name" 181 + else 182 + cp -v "$real" "rootfs/usr/local/bin/$name" 183 + fi 184 + done 185 + # libexec has binaries used by packages even if statically compiled 186 + if [[ -d "$pkg/libexec" ]]; then 187 + mkdir -p "rootfs$pkg" 188 + cp -av "$pkg/libexec" "rootfs$pkg/" 189 + fi 190 + done 191 + # this is necessary for nix to work, it is not a library but nix hardcodes 192 + # it in it's binary 193 + cp -rv ${publicsuffix-list} rootfs/nix/store/ 194 + 195 + # scripts commonly hardcode #!/bin/bash 196 + ln -sf ${bash}/bin/bash rootfs/bin/bash 197 + 198 + echo "spindle-microvm" > rootfs/etc/hostname 199 + printf 'nameserver 127.0.0.1\n' > rootfs/etc/resolv.conf 200 + install -D -m 0644 ${apkRepositories} rootfs/etc/apk/repositories 201 + 202 + echo "spindle-workflow:x:970:970:spindle workflow:/workspace:/bin/sh" >> rootfs/etc/passwd 203 + echo "spindle-workflow:x:970:" >> rootfs/etc/group 204 + echo "spindle-workflow:!::0:::::" >> rootfs/etc/shadow 205 + mkdir -p rootfs/workspace 206 + 207 + # setup nix build users for the daemon 208 + members="" 209 + for i in $(seq 1 8); do 210 + echo "nixbld$i:x:$((30000 + i)):30000:nix build user $i:/var/empty:/sbin/nologin" >> rootfs/etc/passwd 211 + echo "nixbld$i:!::0:::::" >> rootfs/etc/shadow 212 + members="$members''${members:+,}nixbld$i" 213 + done 214 + echo "nixbld:x:30000:$members" >> rootfs/etc/group 215 + 216 + mkdir -p "$out" 217 + mksquashfs rootfs "$out/store-disk" -comp zstd -Xcompression-level 19 -noappend -no-xattrs -all-root -quiet \ 218 + -p '/sbin/apk m 4755 0 0' # suid apk so spindle-workflow can use it without having to doas or smth 219 + cp ${kernel} "$out/kernel" 220 + cp ${initramfs} "$out/initrd" 221 + cp ${imageSpecJSON} "$out/spec.json" 222 + ''
+58
nix/pkgs/spindle-nixos-image.nix
··· 1 + { 2 + pkgs, 3 + lib, 4 + nixosSystem, 5 + }: let 6 + system = nixosSystem.pkgs.stdenv.hostPlatform.qemuArch; 7 + microvm = nixosSystem.config.microvm; 8 + baseConfigHash = lib.pipe nixosSystem.config.system.build.toplevel.outPath [ 9 + (lib.strings.removePrefix "/nix/store/") 10 + (lib.strings.splitString "-") 11 + lib.head 12 + ]; 13 + imageSpecJSON = pkgs.writeText "spec.json" ( 14 + builtins.toJSON { 15 + arch = system; 16 + bootArgs = "earlyprintk=ttyS0 console=hvc0 reboot=t panic=-1 ${lib.concatStringsSep " " microvm.kernelParams}"; 17 + kernel = "kernel"; 18 + initrd = "initrd"; 19 + runnerType = "qemu"; 20 + runnerConfig = { 21 + cpu = "host,+x2apic,-sgx"; 22 + machine = "microvm,accel=kvm:tcg,acpi=on,mem-merge=on,pcie=off,pic=off,pit=off,rtc=on,usb=off"; 23 + console = "hvc0"; 24 + extraArgs = []; 25 + }; 26 + memoryMiB = microvm.mem; 27 + storeDisk = "store-disk"; 28 + storeDiskType = microvm.storeDiskType; 29 + vcpus = microvm.vcpu; 30 + shell = "/run/current-system/sw/bin/bash"; 31 + baseConfigHash = baseConfigHash; 32 + networkInterfaces = 33 + map (interface: { 34 + type = "slirp4netns"; 35 + id = interface.id; 36 + mac = interface.mac; 37 + }) 38 + microvm.interfaces; 39 + volumes = 40 + map (volume: { 41 + fsType = volume.fsType; 42 + image = volume.image; 43 + imageType = volume.imageType; 44 + mountPoint = volume.mountPoint; 45 + readOnly = volume.readOnly; 46 + sizeMiB = volume.size; 47 + }) 48 + microvm.volumes; 49 + } 50 + ); 51 + in 52 + pkgs.runCommand "spindle-nixos-image-${system}" {} '' 53 + mkdir -p "$out" 54 + cp ${imageSpecJSON} "$out/spec.json" 55 + ln -s ${microvm.kernel}/bzImage "$out/kernel" 56 + ln -s ${microvm.initrdPath} "$out/initrd" 57 + ln -s ${microvm.storeDisk} "$out/store-disk" 58 + ''
+50 -4
nix/vm.nix
··· 4 4 hostSystem, 5 5 self, 6 6 }: let 7 + lib = nixpkgs.lib; 8 + 7 9 envVar = name: let 8 10 var = builtins.getEnv name; 9 11 in ··· 19 21 20 22 plcUrl = envVarOr "TANGLED_VM_PLC_URL" "https://plc.directory"; 21 23 jetstream = envVarOr "TANGLED_VM_JETSTREAM_ENDPOINT" "wss://jetstream1.us-west.bsky.network/subscribe"; 24 + 25 + checkFile = value: path: 26 + if builtins.pathExists path 27 + then lib.hasPrefix value (builtins.readFile path) 28 + else false; 29 + _nestedVirt = 30 + (checkFile "1" /sys/module/kvm_amd/parameters/nested) 31 + || (checkFile "Y" /sys/module/kvm_intel/parameters/nested); 32 + nestedVirtWarning = '' 33 + KVM nested virtualisation is not enabled on this host. 34 + You should enable it if you can for better performance when testing the QEMU spindle engine! 35 + ''; 36 + nestedVirt = lib.warnIf (!_nestedVirt) nestedVirtWarning _nestedVirt; 22 37 in 23 - nixpkgs.lib.nixosSystem { 38 + lib.nixosSystem { 24 39 inherit system; 25 40 modules = [ 26 41 self.nixosModules.knot ··· 36 51 host.pkgs = import nixpkgs {system = hostSystem;}; 37 52 38 53 graphics = false; 39 - memorySize = 2048; 40 - diskSize = 10 * 1024; 54 + memorySize = 3072; 55 + diskSize = 20 * 1024; 41 56 cores = 2; 57 + qemu.options = lib.optionals nestedVirt ["-enable-kvm" "-cpu host"]; 58 + 42 59 forwardPorts = [ 43 60 # ssh 44 61 { ··· 101 118 }; 102 119 }; 103 120 }; 121 + systemd.tmpfiles.rules = [ 122 + "L+ /var/lib/spindle/images/nixos-x86_64 - - - - ${self.packages.${system}.spindle-nixos-image}" 123 + "L+ /var/lib/spindle/images/nixos - - - - /var/lib/spindle/images/nixos-x86_64" 124 + "L+ /var/lib/spindle/images/alpine-x86_64 - - - - ${self.packages.${system}.spindle-alpine-image}" 125 + "L+ /var/lib/spindle/images/alpine - - - - /var/lib/spindle/images/alpine-x86_64" 126 + ]; 104 127 # This is fine because any and all ports that are forwarded to host are explicitly marked above, we don't need a separate guest firewall 105 128 networking.firewall.enable = false; 106 129 services.timesyncd.enable = lib.mkForce true; ··· 141 164 142 165 pipelines = { 143 166 logBucket = envVarOr "SPINDLE_S3_LOG_BUCKET" ""; 167 + microvm = { 168 + enableKVM = nestedVirt; 169 + }; 170 + }; 171 + 172 + cache = { 173 + readUrls = ["http://127.0.0.1:8501"]; 174 + trustedPublicKeys = ["cache.local:F7YqpMzuBdILYd/v+wMZN2YKxCzliXQyFmeezOxw7rU="]; 175 + uploadUrl = "http://127.0.0.1:8501/upload"; 144 176 }; 145 177 }; 178 + services.ncps = { 179 + enable = true; 180 + cache = { 181 + allowPutVerb = true; 182 + allowDeleteVerb = true; 183 + hostName = "cache.local"; 184 + secretKeyPath = pkgs.writeText "ncps-secret-key" "cache.local:hay0+jvBNguou2tNt19FvrBCogHwHc+mqQe3bww5ZX4XtiqkzO4F0gth3+/7Axk3ZgrELOWJdDIWZ57M7HDutQ=="; 185 + upstream = { 186 + urls = ["https://cache.nixos.org"]; 187 + publicKeys = ["cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="]; 188 + }; 189 + }; 190 + server.addr = "127.0.0.1:8501"; 191 + }; 146 192 services.postgresql = { 147 193 enable = true; 148 194 package = pkgs.postgresql_14; ··· 191 237 }; 192 238 in { 193 239 knot = mkDataSyncScripts "/mnt/knot-data" config.services.tangled.knot.stateDir; 194 - spindle = mkDataSyncScripts "/mnt/spindle-data" (builtins.dirOf config.services.tangled.spindle.server.dbPath); 240 + spindle = mkDataSyncScripts "/mnt/spindle-data" (dirOf config.services.tangled.spindle.server.dbPath); 195 241 knotmirror.after = ["postgresql.target"]; 196 242 tap-knotmirror.after = ["postgresql.target"]; 197 243 };
+22
shuttle/Cargo.toml
··· 1 + [package] 2 + name = "shuttle" 3 + version = "0.1.0" 4 + edition.workspace = true 5 + license.workspace = true 6 + rust-version.workspace = true 7 + 8 + [dependencies] 9 + anyhow = "1" 10 + base64 = "0.22" 11 + nix = { version = "0.31", features = ["fs", "process", "reboot", "signal", "user"] } 12 + prost = "0.14" 13 + prost-reflect = "0.16" 14 + prost-protovalidate = "0.3" 15 + once_cell = "1" 16 + serde = { version = "1", features = ["derive"] } 17 + serde_json = "1" 18 + tempfile = "3" 19 + tokio = { workspace = true, features = ["fs", "net", "process"] } 20 + tokio-vsock = "0.7.2" 21 + tracing = "0.1" 22 + tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+1
shuttle/README.md
··· 1 + shuttle is a tiny agent for micro VM guests that handles the communication between host <-> guest.
+146
shuttle/src/activation.rs
··· 1 + use crate::command::{self, Spec, run_capture}; 2 + use crate::nix_config::{SPINDLE_RUN_DIR, nix_executable}; 3 + use crate::protocol::{self, Message, v1}; 4 + use anyhow::{Context, Result}; 5 + use std::fs; 6 + use std::path::{Path, PathBuf}; 7 + use std::time::Duration; 8 + use tokio::sync::mpsc::Sender; 9 + use tracing::info; 10 + 11 + const USER_CONFIG_DIR: &str = "/run/spindle/user-config"; 12 + 13 + pub async fn run(id: String, req: v1::ActivateConfig, out: Sender<Message>) { 14 + let config_key = req.config_key.clone(); 15 + let result = activate(&req).await; 16 + let msg = Message { 17 + id, 18 + activate_config_result: Some(v1::ActivateConfigResult { 19 + config_key, 20 + toplevel: (result.as_ref()) 21 + .map(|p| p.to_string_lossy().into_owned()) 22 + .unwrap_or_default(), 23 + error: protocol::error_or_empty(result.err().map(|e| format!("{e:#}"))), 24 + }), 25 + ..Default::default() 26 + }; 27 + let _ = out.send(msg).await; 28 + } 29 + 30 + async fn activate(req: &v1::ActivateConfig) -> Result<PathBuf> { 31 + let need_build = req.toplevel.is_empty(); 32 + let timeout = (req.timeout_seconds > 0) 33 + .then(|| Duration::from_secs(u64::from(req.timeout_seconds))) 34 + .or_else(|| need_build.then_some(Duration::from_secs(10 * 60))) 35 + .unwrap_or(Duration::from_secs(2 * 60)); 36 + 37 + let toplevel = if need_build { 38 + build_toplevel(req, timeout).await? 39 + } else { 40 + realise_toplevel(&req.toplevel, timeout).await? 41 + }; 42 + 43 + if !toplevel.starts_with("/nix/store/") { 44 + anyhow::bail!("config toplevel {toplevel:?} is not a nix store path"); 45 + } 46 + 47 + switch_to_configuration(&toplevel, timeout).await?; 48 + info!( 49 + config_key = %req.config_key, 50 + base_config_hash = %req.base_config_hash, 51 + ?toplevel, 52 + "activated NixOS config" 53 + ); 54 + Ok(toplevel) 55 + } 56 + 57 + async fn build_toplevel(req: &v1::ActivateConfig, timeout: Duration) -> Result<PathBuf> { 58 + let user_config = (req.user_config.is_empty()) 59 + .then_some("{}") 60 + .unwrap_or_else(|| &req.user_config); 61 + 62 + info!("writing user config to {USER_CONFIG_DIR}/config.json"); 63 + write_user_config(user_config).context("write user config")?; 64 + 65 + info!("running nix build command for user config toplevel..."); 66 + let output = run_capture( 67 + Spec::new(nix_executable()) 68 + .args([ 69 + "build", 70 + "--no-link", 71 + "--show-trace", 72 + "--json", 73 + "--file", 74 + "/etc/spindle/nixos/default.nix", 75 + ]) 76 + .cwd(SPINDLE_RUN_DIR) 77 + .timeout(timeout), 78 + ) 79 + .await?; 80 + 81 + if !output.success() { 82 + anyhow::bail!( 83 + "nix config build failed: exit={} error={:?} output={}", 84 + output.exit.exit_code, 85 + output.exit.error, 86 + output.combined_lossy(), 87 + ); 88 + } 89 + 90 + #[derive(Debug, serde::Deserialize)] 91 + struct NixBuildResult { 92 + outputs: NixBuildOutputs, 93 + } 94 + #[derive(Debug, serde::Deserialize)] 95 + struct NixBuildOutputs { 96 + out: PathBuf, 97 + } 98 + let [result] = serde_json::from_slice::<[NixBuildResult; 1]>(&output.stdout) 99 + .context("parse nix build --json output")?; 100 + Ok(result.outputs.out) 101 + } 102 + 103 + fn write_user_config(user_config: &str) -> Result<()> { 104 + fs::create_dir_all(USER_CONFIG_DIR).with_context(|| format!("create {USER_CONFIG_DIR}"))?; 105 + 106 + let config_path = format!("{USER_CONFIG_DIR}/config.json"); 107 + fs::write(&config_path, user_config).with_context(|| format!("write {config_path}"))?; 108 + Ok(()) 109 + } 110 + 111 + async fn realise_toplevel(toplevel: &str, timeout: Duration) -> Result<PathBuf> { 112 + if !toplevel.starts_with("/nix/store/") { 113 + anyhow::bail!("cached config toplevel {toplevel:?} is not a nix store path"); 114 + } 115 + let output = command::run_capture( 116 + Spec::new(nix_executable()) 117 + .args(["build", "--no-link", "--show-trace", toplevel]) 118 + .timeout(timeout), 119 + ) 120 + .await?; 121 + if !output.success() { 122 + anyhow::bail!( 123 + "realise cached config failed: exit={} error={:?} output={}", 124 + output.exit.exit_code, 125 + output.exit.error, 126 + output.combined_lossy(), 127 + ); 128 + } 129 + 130 + Ok(PathBuf::from(toplevel)) 131 + } 132 + 133 + async fn switch_to_configuration(toplevel: &Path, timeout: Duration) -> Result<()> { 134 + info!("switching to new configuration: {:?}", toplevel); 135 + let switch = toplevel.join("bin/switch-to-configuration"); 136 + let output = run_capture(Spec::new(switch).args(["test"]).timeout(timeout)).await?; 137 + if !output.success() { 138 + anyhow::bail!( 139 + "switch-to-configuration test failed: exit={} error={:?} output={}", 140 + output.exit.exit_code, 141 + output.exit.error, 142 + output.combined_lossy(), 143 + ); 144 + } 145 + Ok(()) 146 + }
+724
shuttle/src/cache/mod.rs
··· 1 + use crate::command::{self, Spec}; 2 + use crate::nix_config::{SPINDLE_RUN_DIR, clean_store_paths, nix_executable}; 3 + use crate::protocol::{Message, v1}; 4 + use anyhow::{Context, Result}; 5 + use nix::unistd::{Group, chown}; 6 + use serde::de::DeserializeOwned; 7 + use serde::{Deserialize, Serialize}; 8 + use std::fmt::{self, Write as FmtWrite}; 9 + use std::fs::{self, File, OpenOptions}; 10 + use std::io::{self, Read}; 11 + use std::net::Shutdown; 12 + use std::os::unix::fs::OpenOptionsExt; 13 + use std::sync::{Arc, Mutex}; 14 + use std::time::Duration; 15 + use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; 16 + use tokio::sync::{Semaphore, mpsc, oneshot, watch}; 17 + use tokio::task::{JoinError, JoinHandle, JoinSet}; 18 + use tokio_vsock::{VMADDR_CID_LOCAL, VsockAddr, VsockListener, VsockStream}; 19 + use tracing::{info, warn}; 20 + 21 + mod read_proxy; 22 + mod write_proxy; 23 + 24 + pub use read_proxy::ReadCacheProxy; 25 + pub use write_proxy::WriteCacheProxy; 26 + 27 + const UPLOAD_QUEUE_CAPACITY: usize = 128; 28 + const CONNECTION_WORKERS: usize = 4; 29 + const CACHE_ENQUEUE_IO_TIMEOUT: Duration = Duration::from_secs(2); 30 + const DEFAULT_CACHE_ENQUEUE_PORT: u32 = 10241; 31 + const SHUTTLE_CACHE_ENQUEUE_PORT_ENV: &str = "SHUTTLE_CACHE_VSOCK_PORT"; 32 + const NIX_BUILD_GROUP: &str = "nixbld"; 33 + const SPINDLE_HOOK_TOKEN: &str = "/run/spindle/hook-token"; 34 + 35 + #[derive(Clone, Debug, Default)] 36 + pub struct CacheStats { 37 + pub pending: u32, 38 + pub active: u32, 39 + pub uploaded: u32, 40 + pub failed: u32, 41 + pub last_error: Option<String>, 42 + } 43 + 44 + #[derive(Debug, Default)] 45 + struct CacheState { 46 + stats: CacheStats, 47 + enqueue_active: u32, 48 + stopped: bool, 49 + } 50 + 51 + impl CacheState { 52 + fn snapshot(&self) -> CacheSnapshot { 53 + CacheSnapshot { 54 + stats: self.stats.clone(), 55 + enqueue_active: self.enqueue_active, 56 + stopped: self.stopped, 57 + } 58 + } 59 + } 60 + 61 + #[derive(Clone, Debug, Default)] 62 + struct CacheSnapshot { 63 + stats: CacheStats, 64 + enqueue_active: u32, 65 + stopped: bool, 66 + } 67 + 68 + impl CacheSnapshot { 69 + fn is_idle(&self) -> bool { 70 + self.stats.pending == 0 && self.stats.active == 0 && self.enqueue_active == 0 71 + } 72 + } 73 + 74 + #[derive(Clone)] 75 + pub struct CacheUploadManager { 76 + inner: Arc<CacheUploadInner>, 77 + } 78 + 79 + struct CacheUploadInner { 80 + cmd_tx: mpsc::Sender<Cmd>, 81 + stats_rx: watch::Receiver<CacheSnapshot>, 82 + handles: Mutex<Vec<JoinHandle<()>>>, 83 + } 84 + 85 + struct UploadJob { 86 + paths: Vec<String>, 87 + count: u32, 88 + } 89 + 90 + enum Cmd { 91 + EnqueueStarted, 92 + EnqueueFinished, 93 + Enqueue { 94 + paths: Vec<String>, 95 + reply: oneshot::Sender<Result<usize, String>>, 96 + }, 97 + UploadStarted { 98 + count: u32, 99 + }, 100 + UploadFinished { 101 + count: u32, 102 + error: Option<String>, 103 + }, 104 + UploadWorkerStopped, 105 + Stop, 106 + } 107 + 108 + impl CacheUploadManager { 109 + pub async fn start(upload_url: &str, event_tx: mpsc::Sender<Message>) -> Result<Option<Self>> { 110 + if upload_url.is_empty() { 111 + // nothing to upload to, so don't require the guest-local vsock 112 + // listener (vsock_loopback) or the nix post-build hook 113 + info!("no cache upload url configured, cache uploads disabled"); 114 + return Ok(None); 115 + } 116 + let token = create_hook_token().context("create cache hook token")?; 117 + let port = cache_enqueue_port(); 118 + let listener = VsockListener::bind(VsockAddr::new(VMADDR_CID_LOCAL, port)) 119 + .with_context(|| format!("listen on guest-local vsock port {port}"))?; 120 + 121 + let (cmd_tx, cmd_rx) = mpsc::channel::<Cmd>(UPLOAD_QUEUE_CAPACITY); 122 + let (upload_tx, upload_rx) = mpsc::channel::<UploadJob>(UPLOAD_QUEUE_CAPACITY); 123 + let (stats_tx, stats_rx) = watch::channel(CacheSnapshot::default()); 124 + 125 + let mut handles = Vec::with_capacity(3); 126 + 127 + handles.push(tokio::spawn(async move { 128 + cache_manager_loop(cmd_rx, upload_tx, stats_tx).await; 129 + })); 130 + 131 + let upload_cmd_tx = cmd_tx.clone(); 132 + let upload_url = upload_url.to_owned(); 133 + handles.push(tokio::spawn(async move { 134 + upload_loop(upload_rx, upload_cmd_tx, upload_url).await; 135 + })); 136 + 137 + let accept_cmd_tx = cmd_tx.clone(); 138 + handles.push(tokio::spawn(async move { 139 + accept_loop(listener, token, event_tx, accept_cmd_tx).await; 140 + })); 141 + 142 + info!( 143 + port, 144 + workers = CONNECTION_WORKERS, 145 + "cache upload queue ready" 146 + ); 147 + let inner = Arc::new(CacheUploadInner { 148 + cmd_tx, 149 + stats_rx, 150 + handles: Mutex::new(handles), 151 + }); 152 + Ok(Some(Self { inner })) 153 + } 154 + 155 + pub async fn drain(&self, timeout: Option<Duration>) -> CacheStats { 156 + let mut stats_rx = self.inner.stats_rx.clone(); 157 + let wait = async { 158 + loop { 159 + let snapshot = stats_rx.borrow_and_update().clone(); 160 + if snapshot.is_idle() || snapshot.stopped { 161 + return snapshot.stats; 162 + } 163 + if stats_rx.changed().await.is_err() { 164 + let mut stats = stats_rx.borrow().stats.clone(); 165 + stats.last_error = Some("cache manager stopped".to_owned()); 166 + return stats; 167 + } 168 + } 169 + }; 170 + 171 + match timeout { 172 + Some(timeout) => match tokio::time::timeout(timeout, wait).await { 173 + Ok(stats) => stats, 174 + Err(_) => { 175 + let mut stats = self.inner.stats_rx.borrow().stats.clone(); 176 + stats.last_error = Some("cache drain timed out".to_owned()); 177 + stats 178 + } 179 + }, 180 + None => wait.await, 181 + } 182 + } 183 + } 184 + 185 + impl Drop for CacheUploadInner { 186 + fn drop(&mut self) { 187 + let _ = self.cmd_tx.try_send(Cmd::Stop); 188 + if let Ok(mut handles) = self.handles.lock() { 189 + for handle in handles.drain(..) { 190 + handle.abort(); 191 + } 192 + } 193 + let _ = fs::remove_file(SPINDLE_HOOK_TOKEN); 194 + } 195 + } 196 + 197 + #[derive(Debug, Deserialize, Serialize)] 198 + struct EnqueueBuiltPathsRequest { 199 + token: String, 200 + paths: Vec<String>, 201 + } 202 + 203 + #[derive(Debug, Deserialize, Serialize)] 204 + struct EnqueueBuiltPathsResponse { 205 + queued: usize, 206 + #[serde(default, skip_serializing_if = "String::is_empty")] 207 + error: String, 208 + } 209 + 210 + #[derive(Debug)] 211 + enum JsonLineError { 212 + Empty, 213 + TimedOut, 214 + Io(io::Error), 215 + Json(serde_json::Error), 216 + } 217 + 218 + impl JsonLineError { 219 + fn enqueue_request_message(self) -> String { 220 + match self { 221 + Self::Empty => "empty cache enqueue request".to_owned(), 222 + Self::TimedOut => "cache enqueue read timed out".to_owned(), 223 + Self::Io(error) => error.to_string(), 224 + Self::Json(error) => error.to_string(), 225 + } 226 + } 227 + } 228 + 229 + impl fmt::Display for JsonLineError { 230 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 231 + match self { 232 + Self::Empty => f.write_str("empty message"), 233 + Self::TimedOut => f.write_str("timed out"), 234 + Self::Io(error) => error.fmt(f), 235 + Self::Json(error) => error.fmt(f), 236 + } 237 + } 238 + } 239 + 240 + async fn cache_manager_loop( 241 + mut cmd_rx: mpsc::Receiver<Cmd>, 242 + upload_tx: mpsc::Sender<UploadJob>, 243 + stats_tx: watch::Sender<CacheSnapshot>, 244 + ) { 245 + let mut state = CacheState::default(); 246 + 247 + let publish_state = |state: &CacheState| { 248 + let _ = stats_tx.send(state.snapshot()); 249 + }; 250 + 251 + while let Some(command) = cmd_rx.recv().await { 252 + match command { 253 + Cmd::EnqueueStarted => { 254 + state.enqueue_active += 1; 255 + publish_state(&state); 256 + } 257 + Cmd::EnqueueFinished => { 258 + decrement_counter(&mut state.enqueue_active, 1, "cache enqueue active"); 259 + publish_state(&state); 260 + } 261 + Cmd::Enqueue { paths, reply } => { 262 + let result = enqueue_upload_job(&mut state, &upload_tx, paths); 263 + publish_state(&state); 264 + let _ = reply.send(result); 265 + } 266 + Cmd::UploadStarted { count } => { 267 + decrement_counter(&mut state.stats.pending, count, "cache pending uploads"); 268 + state.stats.active += count; 269 + publish_state(&state); 270 + } 271 + Cmd::UploadFinished { count, error } => { 272 + decrement_counter(&mut state.stats.active, count, "cache active uploads"); 273 + if let Some(error) = error { 274 + state.stats.failed += count; 275 + state.stats.last_error = Some(error); 276 + } else { 277 + state.stats.uploaded += count; 278 + } 279 + publish_state(&state); 280 + } 281 + Cmd::UploadWorkerStopped => { 282 + state.stopped = true; 283 + publish_state(&state); 284 + } 285 + Cmd::Stop => { 286 + state.stopped = true; 287 + publish_state(&state); 288 + break; 289 + } 290 + } 291 + } 292 + 293 + state.stopped = true; 294 + publish_state(&state); 295 + } 296 + 297 + fn enqueue_upload_job( 298 + state: &mut CacheState, 299 + upload_tx: &mpsc::Sender<UploadJob>, 300 + paths: Vec<String>, 301 + ) -> Result<usize, String> { 302 + if state.stopped { 303 + return Err("cache manager stopped".to_owned()); 304 + } 305 + 306 + let count = paths.len() as u32; 307 + if count == 0 { 308 + return Ok(0); 309 + } 310 + 311 + match upload_tx.try_send(UploadJob { paths, count }) { 312 + Ok(()) => { 313 + state.stats.pending += count; 314 + Ok(count as usize) 315 + } 316 + Err(mpsc::error::TrySendError::Full(_)) => Err("cache upload queue is full".to_owned()), 317 + Err(mpsc::error::TrySendError::Closed(_)) => { 318 + state.stopped = true; 319 + Err("cache upload worker stopped".to_owned()) 320 + } 321 + } 322 + } 323 + 324 + fn decrement_counter(counter: &mut u32, count: u32, name: &'static str) { 325 + match counter.checked_sub(count) { 326 + Some(value) => *counter = value, 327 + None => { 328 + warn!(name, current = *counter, count, "cache counter underflow"); 329 + *counter = 0; 330 + } 331 + } 332 + } 333 + 334 + async fn upload_loop( 335 + mut upload_rx: mpsc::Receiver<UploadJob>, 336 + cmd_tx: mpsc::Sender<Cmd>, 337 + upload_url: String, 338 + ) { 339 + while let Some(job) = upload_rx.recv().await { 340 + let cmd = Cmd::UploadStarted { count: job.count }; 341 + if cmd_tx.send(cmd).await.is_err() { 342 + break; 343 + } 344 + 345 + let error = upload_paths(&upload_url, &job.paths) 346 + .await 347 + .err() 348 + .map(|error| error.to_string()); 349 + 350 + let cmd = Cmd::UploadFinished { 351 + count: job.count, 352 + error, 353 + }; 354 + if cmd_tx.send(cmd).await.is_err() { 355 + break; 356 + } 357 + } 358 + 359 + let _ = cmd_tx.send(Cmd::UploadWorkerStopped).await; 360 + } 361 + 362 + // runs nix copy against the write cache proxy, which goes to the spindle 363 + // and spindle will then forward the request to the actual binary cache 364 + async fn upload_paths(upload_url: &str, paths: &[String]) -> Result<()> { 365 + fn add_query_param(url: &str, key: &str, value: &str) -> String { 366 + let separator = url.contains('?').then_some('&').unwrap_or('?'); 367 + format!("{}{}{}={}", url, separator, key, value) 368 + } 369 + 370 + if paths.is_empty() || upload_url.is_empty() { 371 + return Ok(()); 372 + } 373 + 374 + // we use zstd 3 because its the best usually. it is faster than no 375 + // compression also because of IO savings 376 + let dest_url = add_query_param(upload_url, "compression", "zstd"); 377 + let dest_url = add_query_param(&dest_url, "compression-level", "3"); 378 + let dest_url = add_query_param(&dest_url, "parallel-compression", "true"); 379 + 380 + let spec = Spec::new(nix_executable()) 381 + .args(["copy", "--to", &dest_url]) 382 + .args(paths.iter().cloned()) 383 + .timeout(Duration::from_secs(10 * 60)); 384 + 385 + let output = command::run_capture(spec).await.context("run nix copy")?; 386 + if !output.success() { 387 + anyhow::bail!( 388 + "nix copy failed: exit={} error={:?} output={}", 389 + output.exit.exit_code, 390 + output.exit.error, 391 + output.combined_lossy(), 392 + ); 393 + } 394 + 395 + info!(paths = paths.len(), %upload_url, "uploaded cache paths"); 396 + Ok(()) 397 + } 398 + 399 + async fn accept_loop( 400 + listener: VsockListener, 401 + token: String, 402 + event_tx: mpsc::Sender<Message>, 403 + cmd_tx: mpsc::Sender<Cmd>, 404 + ) { 405 + let permits = Arc::new(Semaphore::new(CONNECTION_WORKERS)); 406 + let mut tasks = JoinSet::new(); 407 + loop { 408 + tokio::select! { 409 + accepted = listener.accept() => match accepted { 410 + Ok((conn, _addr)) => { 411 + let Ok(permit) = permits.clone().try_acquire_owned() else { 412 + tasks.spawn(async move { 413 + let mut conn = conn; 414 + write_enqueue_response( 415 + &mut conn, 416 + 0, 417 + Some("cache enqueue workers are busy".to_owned()), 418 + ) 419 + .await; 420 + }); 421 + warn!("cache enqueue dropped because workers are busy"); 422 + continue; 423 + }; 424 + 425 + let worker_cmd_tx = cmd_tx.clone(); 426 + if let Err(error) = start_enqueue_request(&worker_cmd_tx) { 427 + tasks.spawn(async move { 428 + let mut conn = conn; 429 + write_enqueue_response(&mut conn, 0, Some(error)).await; 430 + }); 431 + continue; 432 + } 433 + 434 + let worker_token = token.clone(); 435 + let worker_event_tx = event_tx.clone(); 436 + tasks.spawn(async move { 437 + let _permit = permit; 438 + handle_enqueue_conn( 439 + conn, 440 + &worker_token, 441 + &worker_event_tx, 442 + &worker_cmd_tx, 443 + ) 444 + .await; 445 + let _ = worker_cmd_tx.send(Cmd::EnqueueFinished).await; 446 + }); 447 + } 448 + Err(error) => { 449 + if error.kind() != io::ErrorKind::Interrupted { 450 + warn!(%error, "cache enqueue accept failed"); 451 + } 452 + } 453 + }, 454 + Some(result) = tasks.join_next(), if !tasks.is_empty() => { 455 + log_enqueue_task_result(result); 456 + } 457 + } 458 + } 459 + } 460 + 461 + fn log_enqueue_task_result(result: Result<(), JoinError>) { 462 + if let Err(error) = result { 463 + warn!(%error, "cache enqueue task failed"); 464 + } 465 + } 466 + 467 + async fn handle_enqueue_conn( 468 + mut conn: VsockStream, 469 + expected_token: &str, 470 + event_tx: &mpsc::Sender<Message>, 471 + cmd_tx: &mpsc::Sender<Cmd>, 472 + ) { 473 + let req: EnqueueBuiltPathsRequest = match read_enqueue_request(&mut conn).await { 474 + Ok(req) => req, 475 + Err(error) => { 476 + write_enqueue_response(&mut conn, 0, Some(error)).await; 477 + return; 478 + } 479 + }; 480 + 481 + if req.token != expected_token { 482 + write_enqueue_response(&mut conn, 0, Some("invalid cache enqueue token".to_owned())).await; 483 + return; 484 + } 485 + 486 + match enqueue_paths(cmd_tx, req.paths).await { 487 + Ok(queued) => { 488 + send_built_paths_event(event_tx, queued.event_paths).await; 489 + write_enqueue_response(&mut conn, queued.count, None).await; 490 + } 491 + Err(error) => write_enqueue_response(&mut conn, 0, Some(error)).await, 492 + } 493 + } 494 + 495 + fn start_enqueue_request(cmd_tx: &mpsc::Sender<Cmd>) -> Result<(), String> { 496 + match cmd_tx.try_send(Cmd::EnqueueStarted) { 497 + Ok(()) => Ok(()), 498 + Err(mpsc::error::TrySendError::Full(_)) => Err("cache upload queue is full".to_owned()), 499 + Err(mpsc::error::TrySendError::Closed(_)) => Err("cache upload worker stopped".to_owned()), 500 + } 501 + } 502 + 503 + async fn read_enqueue_request(conn: &mut VsockStream) -> Result<EnqueueBuiltPathsRequest, String> { 504 + read_json_line(conn) 505 + .await 506 + .map_err(JsonLineError::enqueue_request_message) 507 + } 508 + 509 + async fn read_json_line<T>(conn: &mut VsockStream) -> Result<T, JsonLineError> 510 + where 511 + T: DeserializeOwned, 512 + { 513 + let mut data = Vec::new(); 514 + let mut reader = BufReader::new(conn); 515 + let bytes_read = tokio::time::timeout( 516 + CACHE_ENQUEUE_IO_TIMEOUT, 517 + reader.read_until(b'\n', &mut data), 518 + ) 519 + .await 520 + .map_err(|_| JsonLineError::TimedOut)? 521 + .map_err(JsonLineError::Io)?; 522 + 523 + if bytes_read == 0 { 524 + return Err(JsonLineError::Empty); 525 + } 526 + 527 + serde_json::from_slice(&data).map_err(JsonLineError::Json) 528 + } 529 + 530 + struct QueuedPaths { 531 + count: usize, 532 + event_paths: Vec<String>, 533 + } 534 + 535 + async fn enqueue_paths( 536 + cmd_tx: &mpsc::Sender<Cmd>, 537 + paths: Vec<String>, 538 + ) -> Result<QueuedPaths, String> { 539 + let paths = clean_store_paths(&paths); 540 + let event_paths = paths.clone(); 541 + if paths.is_empty() { 542 + return Ok(QueuedPaths { 543 + count: 0, 544 + event_paths, 545 + }); 546 + } 547 + 548 + let (reply, queued) = oneshot::channel(); 549 + match cmd_tx.try_send(Cmd::Enqueue { paths, reply }) { 550 + Ok(()) => match queued.await { 551 + Ok(Ok(count)) => Ok(QueuedPaths { count, event_paths }), 552 + Ok(Err(error)) => Err(error), 553 + Err(_) => Err("cache upload worker stopped".to_owned()), 554 + }, 555 + Err(mpsc::error::TrySendError::Full(_)) => Err("cache upload queue is full".to_owned()), 556 + Err(mpsc::error::TrySendError::Closed(_)) => Err("cache upload worker stopped".to_owned()), 557 + } 558 + } 559 + 560 + async fn send_built_paths_event(event_tx: &mpsc::Sender<Message>, paths: Vec<String>) { 561 + if paths.is_empty() { 562 + return; 563 + } 564 + 565 + let msg = Message { 566 + id: "built-paths".to_owned(), 567 + built_paths: Some(v1::BuiltPaths { 568 + paths, 569 + reason: "post_build_hook".to_owned(), 570 + }), 571 + ..Default::default() 572 + }; 573 + let _ = event_tx.send(msg).await; 574 + } 575 + 576 + async fn write_enqueue_response(conn: &mut VsockStream, queued: usize, error: Option<String>) { 577 + let response = EnqueueBuiltPathsResponse { 578 + queued, 579 + error: error.unwrap_or_default(), 580 + }; 581 + let _ = write_json_line(conn, &response).await; 582 + } 583 + 584 + async fn write_json_line<T>(conn: &mut VsockStream, value: &T) -> Result<(), JsonLineError> 585 + where 586 + T: Serialize + ?Sized, 587 + { 588 + let data = serde_json::to_vec(value).map_err(JsonLineError::Json)?; 589 + tokio::time::timeout(CACHE_ENQUEUE_IO_TIMEOUT, async { 590 + conn.write_all(&data).await?; 591 + conn.write_all(b"\n").await?; 592 + VsockStream::shutdown(conn, Shutdown::Write) 593 + }) 594 + .await 595 + .map_err(|_| JsonLineError::TimedOut)? 596 + .map_err(JsonLineError::Io) 597 + } 598 + 599 + // we use a loopback vsock here since its better than having to do the whole http song and dance! 600 + pub async fn enqueue_built_paths(paths: &[String]) { 601 + let paths = clean_store_paths(paths); 602 + if paths.is_empty() { 603 + return; 604 + } 605 + 606 + let token = match read_hook_token() { 607 + Ok(token) => token, 608 + Err(_) => return, 609 + }; 610 + 611 + if token.is_empty() { 612 + return; 613 + } 614 + 615 + let mut conn = 616 + match VsockStream::connect(VsockAddr::new(VMADDR_CID_LOCAL, cache_enqueue_port())).await { 617 + Ok(conn) => conn, 618 + Err(error) => { 619 + warn!(paths = paths.len(), %error, "cache enqueue unavailable"); 620 + return; 621 + } 622 + }; 623 + 624 + let request = EnqueueBuiltPathsRequest { token, paths }; 625 + match write_json_line(&mut conn, &request).await { 626 + Ok(()) => {} 627 + Err(JsonLineError::Json(error)) => { 628 + warn!(%error, "cache enqueue encode failed"); 629 + return; 630 + } 631 + Err(JsonLineError::TimedOut) => { 632 + warn!("cache enqueue write timed out"); 633 + return; 634 + } 635 + Err(error) => { 636 + warn!(%error, "cache enqueue write failed"); 637 + return; 638 + } 639 + } 640 + 641 + let response: EnqueueBuiltPathsResponse = match read_json_line(&mut conn).await { 642 + Ok(response) => response, 643 + Err(JsonLineError::Empty) => { 644 + warn!("cache enqueue ack was empty"); 645 + return; 646 + } 647 + Err(JsonLineError::TimedOut) => { 648 + warn!("cache enqueue ack timed out"); 649 + return; 650 + } 651 + Err(error) => { 652 + warn!(%error, "cache enqueue ack failed"); 653 + return; 654 + } 655 + }; 656 + 657 + if !response.error.is_empty() { 658 + warn!(error = %response.error, "cache enqueue rejected"); 659 + return; 660 + } 661 + 662 + info!(queued = response.queued, "cache paths enqueued"); 663 + } 664 + 665 + fn cache_enqueue_port() -> u32 { 666 + std::env::var(SHUTTLE_CACHE_ENQUEUE_PORT_ENV) 667 + .ok() 668 + .and_then(|value| value.parse().ok()) 669 + .unwrap_or(DEFAULT_CACHE_ENQUEUE_PORT) 670 + } 671 + 672 + fn create_hook_token() -> Result<String> { 673 + use std::io::Write; 674 + 675 + fs::create_dir_all(SPINDLE_RUN_DIR).with_context(|| format!("create {SPINDLE_RUN_DIR}"))?; 676 + let token = random_token().context("generate hook token")?; 677 + let mut file = OpenOptions::new() 678 + .create(true) 679 + .truncate(true) 680 + .write(true) 681 + .mode(0o640) 682 + .open(SPINDLE_HOOK_TOKEN) 683 + .with_context(|| format!("create {SPINDLE_HOOK_TOKEN}"))?; 684 + allow_nix_build_group(SPINDLE_HOOK_TOKEN)?; 685 + file.write_all(token.as_bytes()) 686 + .with_context(|| format!("write {SPINDLE_HOOK_TOKEN}"))?; 687 + file.write_all(b"\n") 688 + .with_context(|| format!("write {SPINDLE_HOOK_TOKEN}"))?; 689 + Ok(token) 690 + } 691 + 692 + fn allow_nix_build_group(path: &str) -> Result<()> { 693 + let Some(group) = 694 + Group::from_name(NIX_BUILD_GROUP).with_context(|| format!("lookup {NIX_BUILD_GROUP}"))? 695 + else { 696 + warn!( 697 + group = NIX_BUILD_GROUP, 698 + "nix build group not found; cache hook token remains root-only" 699 + ); 700 + return Ok(()); 701 + }; 702 + 703 + chown(path, None, Some(group.gid)).with_context(|| format!("chown {path} to {NIX_BUILD_GROUP}")) 704 + } 705 + 706 + fn read_hook_token() -> Result<String> { 707 + fs::read_to_string(SPINDLE_HOOK_TOKEN) 708 + .map(|token| token.trim().to_owned()) 709 + .with_context(|| format!("read {SPINDLE_HOOK_TOKEN}")) 710 + } 711 + 712 + fn random_token() -> Result<String> { 713 + let mut bytes = [0_u8; 32]; 714 + File::open("/dev/urandom") 715 + .context("open /dev/urandom")? 716 + .read_exact(&mut bytes) 717 + .context("read /dev/urandom")?; 718 + 719 + let mut token = String::with_capacity(bytes.len() * 2); 720 + for byte in bytes { 721 + write!(&mut token, "{byte:02x}").unwrap(); 722 + } 723 + Ok(token) 724 + }
+27
shuttle/src/cache/read_proxy.rs
··· 1 + use crate::host_proxy::VsockTcpProxy; 2 + use anyhow::Result; 3 + 4 + const DEFAULT_CACHE_READ_PROXY_ADDR: &str = "127.0.0.1:10500"; 5 + const SHUTTLE_CACHE_READ_PROXY_ADDR_ENV: &str = "SHUTTLE_CACHE_READ_PROXY_ADDR"; 6 + 7 + pub struct ReadCacheProxy { 8 + inner: VsockTcpProxy, 9 + } 10 + 11 + impl ReadCacheProxy { 12 + pub async fn start(host_cid: u32, host_port: u32) -> Result<Option<Self>> { 13 + if host_port == 0 { 14 + return Ok(None); 15 + } 16 + 17 + let addr = std::env::var(SHUTTLE_CACHE_READ_PROXY_ADDR_ENV) 18 + .unwrap_or_else(|_| DEFAULT_CACHE_READ_PROXY_ADDR.to_owned()); 19 + 20 + let inner = VsockTcpProxy::start("read cache proxy", &addr, host_cid, host_port).await?; 21 + Ok(Some(Self { inner })) 22 + } 23 + 24 + pub fn url(&self) -> &str { 25 + self.inner.url() 26 + } 27 + }
+22
shuttle/src/cache/write_proxy.rs
··· 1 + use crate::host_proxy::VsockTcpProxy; 2 + use anyhow::Result; 3 + 4 + pub struct WriteCacheProxy { 5 + inner: VsockTcpProxy, 6 + } 7 + 8 + impl WriteCacheProxy { 9 + pub async fn start(host_cid: u32, host_port: u32) -> Result<Option<Self>> { 10 + if host_port == 0 { 11 + return Ok(None); 12 + } 13 + 14 + let inner = 15 + VsockTcpProxy::start("write cache proxy", "127.0.0.1:0", host_cid, host_port).await?; 16 + Ok(Some(Self { inner })) 17 + } 18 + 19 + pub fn url(&self) -> &str { 20 + self.inner.url() 21 + } 22 + }
+298
shuttle/src/command.rs
··· 1 + use anyhow::{Context, Result}; 2 + use nix::sys::signal::{Signal, kill}; 3 + use nix::unistd::{Gid, Pid, Uid, User, getgrouplist, setgid, setgroups, setuid}; 4 + use std::ffi::{CString, OsStr, OsString}; 5 + use std::io; 6 + use std::os::unix::process::ExitStatusExt; 7 + use std::path::PathBuf; 8 + use std::process::Stdio; 9 + use std::time::Duration; 10 + use tokio::io::{AsyncRead, AsyncReadExt}; 11 + use tokio::process::{Child, Command}; 12 + use tokio::sync::mpsc::{self, Receiver, Sender}; 13 + use tokio::task::JoinHandle; 14 + use tracing::warn; 15 + 16 + #[derive(Clone, Debug)] 17 + pub struct Spec { 18 + pub program: OsString, 19 + pub args: Vec<OsString>, 20 + pub env: Vec<(OsString, OsString)>, 21 + pub cwd: Option<PathBuf>, 22 + pub timeout: Option<Duration>, 23 + pub uid: Option<u32>, 24 + pub gid: Option<u32>, 25 + } 26 + 27 + impl Spec { 28 + pub fn new(program: impl Into<OsString>) -> Self { 29 + Self { 30 + program: program.into(), 31 + args: Vec::new(), 32 + env: Vec::new(), 33 + cwd: None, 34 + timeout: None, 35 + uid: None, 36 + gid: None, 37 + } 38 + } 39 + 40 + pub fn arg(mut self, arg: impl Into<OsString>) -> Self { 41 + self.args.push(arg.into()); 42 + self 43 + } 44 + 45 + pub fn args<I, S>(mut self, args: I) -> Self 46 + where 47 + I: IntoIterator<Item = S>, 48 + S: Into<OsString>, 49 + { 50 + self.args.extend(args.into_iter().map(Into::into)); 51 + self 52 + } 53 + 54 + pub fn envs<I, K, V>(mut self, env: I) -> Self 55 + where 56 + I: IntoIterator<Item = (K, V)>, 57 + K: Into<OsString>, 58 + V: Into<OsString>, 59 + { 60 + self.env.extend( 61 + env.into_iter() 62 + .map(|(key, value)| (key.into(), value.into())), 63 + ); 64 + self 65 + } 66 + 67 + pub fn cwd(mut self, cwd: impl Into<PathBuf>) -> Self { 68 + self.cwd = Some(cwd.into()); 69 + self 70 + } 71 + 72 + pub fn timeout(mut self, timeout: Duration) -> Self { 73 + self.timeout = Some(timeout); 74 + self 75 + } 76 + 77 + pub fn run_as(mut self, uid: u32, gid: u32) -> Self { 78 + self.uid = Some(uid); 79 + self.gid = Some(gid); 80 + self 81 + } 82 + } 83 + 84 + #[derive(Clone, Debug)] 85 + pub struct ExitResult { 86 + pub exit_code: i32, 87 + pub error: Option<String>, 88 + pub timed_out: bool, 89 + } 90 + 91 + #[derive(Clone, Debug)] 92 + pub struct CaptureOutput { 93 + pub exit: ExitResult, 94 + pub stdout: Vec<u8>, 95 + pub stderr: Vec<u8>, 96 + } 97 + 98 + impl CaptureOutput { 99 + pub fn success(&self) -> bool { 100 + self.exit.exit_code == 0 && self.exit.error.is_none() 101 + } 102 + 103 + pub fn combined_lossy(&self) -> String { 104 + let mut data = self.stdout.clone(); 105 + data.extend_from_slice(&self.stderr); 106 + String::from_utf8_lossy(&data).trim().to_owned() 107 + } 108 + } 109 + 110 + #[derive(Clone, Copy, Debug)] 111 + pub enum OutKind { 112 + Stdout, 113 + Stderr, 114 + } 115 + 116 + #[derive(Clone, Debug)] 117 + pub struct OutData { 118 + pub data: Vec<u8>, 119 + pub kind: OutKind, 120 + } 121 + 122 + pub struct StreamingCommand { 123 + events: Receiver<OutData>, 124 + exit: JoinHandle<Result<ExitResult>>, 125 + } 126 + 127 + impl StreamingCommand { 128 + pub fn into_parts(self) -> (Receiver<OutData>, JoinHandle<Result<ExitResult>>) { 129 + (self.events, self.exit) 130 + } 131 + } 132 + 133 + pub async fn run_capture(spec: Spec) -> Result<CaptureOutput> { 134 + let running = spawn_streaming(spec)?; 135 + let mut stdout = Vec::new(); 136 + let mut stderr = Vec::new(); 137 + let (mut events, exit_task) = running.into_parts(); 138 + 139 + while let Some(event) = events.recv().await { 140 + match event.kind { 141 + OutKind::Stdout => stdout.extend_from_slice(&event.data), 142 + OutKind::Stderr => stderr.extend_from_slice(&event.data), 143 + } 144 + } 145 + 146 + let exit = exit_task 147 + .await 148 + .unwrap_or_else(|error| Err(anyhow::anyhow!("command supervisor failed: {error}")))?; 149 + Ok(CaptureOutput { 150 + exit, 151 + stdout, 152 + stderr, 153 + }) 154 + } 155 + 156 + pub fn spawn_streaming(mut spec: Spec) -> Result<StreamingCommand> { 157 + let mut child = spawn(&mut spec)?; 158 + let stdout = child.stdout.take().context("stdout pipe missing")?; 159 + let stderr = child.stderr.take().context("stderr pipe missing")?; 160 + 161 + let (events_tx, events_rx) = mpsc::channel(64); 162 + let stdout_thread = spawn_reader(stdout, events_tx.clone(), OutKind::Stdout); 163 + let stderr_thread = spawn_reader(stderr, events_tx.clone(), OutKind::Stderr); 164 + drop(events_tx); 165 + 166 + let exit = tokio::spawn(async move { 167 + let exit = wait_child(&mut child, spec.timeout).await; 168 + 169 + // ensure all output is observed before exiting 170 + // this assumes children dont daemonize and hold onto the stdout/err 171 + stdout_thread.await.context("stdout reader task failed")?; 172 + stderr_thread.await.context("stderr reader task failed")?; 173 + 174 + Ok(exit) 175 + }); 176 + 177 + Ok(StreamingCommand { 178 + events: events_rx, 179 + exit, 180 + }) 181 + } 182 + 183 + fn spawn(spec: &mut Spec) -> Result<Child> { 184 + let mut cmd = Command::new(&spec.program); 185 + cmd.args(&spec.args) 186 + .envs(spec.env.iter().map(|(key, value)| (key, value))) 187 + .stdout(Stdio::piped()) 188 + .stderr(Stdio::piped()); 189 + 190 + if let Some(cwd) = &spec.cwd { 191 + cmd.current_dir(cwd); 192 + } 193 + 194 + // don't use rust's .uid() / .gid() methods here because they clear 195 + // supplemantary groups, which means for example adding a user to "docker" 196 + // group won't actually let it access the sock. 197 + // https://github.com/rust-lang/rust/issues/90747 198 + if let (Some(uid), Some(gid)) = (spec.uid, spec.gid) { 199 + let username = User::from_uid(Uid::from_raw(uid)) 200 + .ok() 201 + .flatten() 202 + .map(|u| u.name) 203 + .with_context(|| format!("lookup passwd entry for uid {uid}"))?; 204 + let cname = CString::new(username) 205 + .with_context(|| format!("username for uid {uid} contained a null byte"))?; 206 + // resolve groups beforehand so we don't have to read /etc/group in the pre_exec 207 + let groups = 208 + getgrouplist(&cname, Gid::from_raw(gid)).context("resolve supplementary groups")?; 209 + // SAFETY: pre_exec runs between fork and execve in the child. 210 + // we only call async-signal-safe syscalls and we don't touch any 211 + // shared state, no allocator, no mutexes, no globals. 212 + unsafe { 213 + cmd.pre_exec(move || { 214 + setgroups(&groups).map_err(io::Error::from)?; 215 + setgid(Gid::from_raw(gid)).map_err(io::Error::from)?; 216 + setuid(Uid::from_raw(uid)).map_err(io::Error::from)?; 217 + Ok(()) 218 + }); 219 + } 220 + } 221 + 222 + // allow us to kill this whole process tree on deadline 223 + cmd.process_group(0); 224 + 225 + cmd.spawn() 226 + .with_context(|| format!("spawn {}", display_os(&spec.program))) 227 + } 228 + 229 + async fn wait_child(child: &mut Child, timeout: Option<Duration>) -> ExitResult { 230 + let wait = child.wait(); 231 + let status = match timeout { 232 + Some(timeout) => match tokio::time::timeout(timeout, wait).await { 233 + Ok(status) => status, 234 + Err(_) => { 235 + if let Some(pid) = child.id() 236 + && let Err(error) = kill(Pid::from_raw(-(pid as i32)), Signal::SIGKILL) 237 + { 238 + warn!(pid, %error, "failed to kill process group"); 239 + } 240 + let _ = child.wait().await; 241 + return ExitResult { 242 + exit_code: 124, 243 + error: Some("command timed out".to_owned()), 244 + timed_out: true, 245 + }; 246 + } 247 + }, 248 + None => wait.await, 249 + }; 250 + 251 + match status { 252 + Ok(status) => ExitResult { 253 + exit_code: status 254 + .code() 255 + .or_else(|| status.signal().map(|signal| 128 + signal)) 256 + .unwrap_or(1), 257 + error: None, 258 + timed_out: false, 259 + }, 260 + Err(error) => ExitResult { 261 + exit_code: 1, 262 + error: Some(error.to_string()), 263 + timed_out: false, 264 + }, 265 + } 266 + } 267 + 268 + fn spawn_reader( 269 + mut reader: impl AsyncRead + Unpin + Send + 'static, 270 + events: Sender<OutData>, 271 + kind: OutKind, 272 + ) -> JoinHandle<()> { 273 + tokio::spawn(async move { 274 + let mut buf = [0_u8; 32 * 1024]; 275 + loop { 276 + match reader.read(&mut buf).await { 277 + Ok(0) => return, 278 + Ok(n) => { 279 + let event = OutData { 280 + data: buf[..n].to_vec(), 281 + kind, 282 + }; 283 + if events.send(event).await.is_err() { 284 + return; 285 + } 286 + } 287 + Err(error) => { 288 + warn!(%error, "failed to read command stream"); 289 + return; 290 + } 291 + } 292 + } 293 + }) 294 + } 295 + 296 + fn display_os(value: &OsStr) -> String { 297 + value.to_string_lossy().into_owned() 298 + }
+255
shuttle/src/dns_proxy.rs
··· 1 + use anyhow::{Context, Result}; 2 + use std::io; 3 + use std::net::SocketAddr; 4 + use std::sync::Arc; 5 + use std::time::Duration; 6 + use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; 7 + use tokio::net::{TcpListener, TcpStream, UdpSocket}; 8 + use tokio::task::{JoinError, JoinHandle, JoinSet}; 9 + use tokio_vsock::{VsockAddr, VsockStream}; 10 + use tracing::{info, warn}; 11 + 12 + const DEFAULT_DNS_PROXY_ADDR: &str = "127.0.0.1:53"; 13 + const SHUTTLE_DNS_PROXY_ADDR_ENV: &str = "SHUTTLE_DNS_PROXY_ADDR"; 14 + 15 + const MAX_DNS_MESSAGE_BYTES: usize = u16::MAX as usize; 16 + const DNS_IO_TIMEOUT: Duration = Duration::from_secs(10); 17 + const DNS_TCP_IDLE_TIMEOUT: Duration = Duration::from_secs(120); 18 + 19 + // implements a proxy that sends dns requests to the spindle and 20 + // lets the spindle resolve any queries, and streams the response back. 21 + // 22 + // we use this because the way spindle isolates QEMU VMs is unreliable 23 + // to rely on. if we unblock the blackholed-routes for private nameservers 24 + // like slirp one, we risk leaking internal DNS zones, even if the guest 25 + // can't connect to them. there is also potentially DNS rebinding issues. 26 + // and other slirp4netns quirks... 27 + // 28 + // this way we also get to filter the DNS queries very easily, so we can 29 + // make sure we remove everything that would leak a host information. 30 + pub struct DnsProxy { 31 + handles: Vec<JoinHandle<()>>, 32 + } 33 + 34 + #[derive(Clone)] 35 + struct HostDnsClient { 36 + host_cid: u32, 37 + host_port: u32, 38 + } 39 + 40 + impl DnsProxy { 41 + pub async fn start(host_cid: u32, host_port: u32) -> Result<Option<Self>> { 42 + if host_port == 0 { 43 + return Ok(None); 44 + } 45 + 46 + let addr = std::env::var(SHUTTLE_DNS_PROXY_ADDR_ENV) 47 + .unwrap_or_else(|_| DEFAULT_DNS_PROXY_ADDR.to_owned()); 48 + 49 + let udp = Arc::new( 50 + UdpSocket::bind(&addr) 51 + .await 52 + .with_context(|| format!("bind dns udp listener {addr}"))?, 53 + ); 54 + 55 + let tcp = TcpListener::bind(&addr) 56 + .await 57 + .with_context(|| format!("bind dns tcp listener {addr}"))?; 58 + 59 + let host = HostDnsClient { 60 + host_cid, 61 + host_port, 62 + }; 63 + 64 + let handles = vec![ 65 + tokio::spawn(udp_loop(udp, host.clone())), 66 + tokio::spawn(tcp_loop(tcp, host)), 67 + ]; 68 + 69 + info!(%addr, host_cid, host_port, "dns proxy ready"); 70 + Ok(Some(Self { handles })) 71 + } 72 + } 73 + 74 + impl Drop for DnsProxy { 75 + fn drop(&mut self) { 76 + for handle in self.handles.drain(..) { 77 + handle.abort(); 78 + } 79 + } 80 + } 81 + 82 + impl HostDnsClient { 83 + async fn query(&self, query: Vec<u8>) -> Result<Vec<u8>> { 84 + match self.query_once(&query).await { 85 + Ok(response) => Ok(response), 86 + Err(first_error) => self.query_once(&query).await.with_context(|| { 87 + format!("dns host query failed after retry; first error: {first_error:#}") 88 + }), 89 + } 90 + } 91 + 92 + async fn query_once(&self, query: &[u8]) -> Result<Vec<u8>> { 93 + let addr = VsockAddr::new(self.host_cid, self.host_port); 94 + 95 + let mut host = tokio::time::timeout(DNS_IO_TIMEOUT, VsockStream::connect(addr)) 96 + .await 97 + .context("dns host connect timed out")? 98 + .with_context(|| { 99 + format!( 100 + "dial host dns proxy cid={} port={}", 101 + self.host_cid, self.host_port 102 + ) 103 + })?; 104 + 105 + tokio::time::timeout(DNS_IO_TIMEOUT, async { 106 + write_dns_packet(&mut host, query) 107 + .await 108 + .context("write dns query to host")?; 109 + 110 + read_dns_packet(&mut host) 111 + .await 112 + .context("read dns response from host")? 113 + .context("host dns proxy closed without response") 114 + }) 115 + .await 116 + .context("dns host query timed out")? 117 + } 118 + } 119 + 120 + async fn udp_loop(socket: Arc<UdpSocket>, host: HostDnsClient) { 121 + let mut buf = vec![0; MAX_DNS_MESSAGE_BYTES]; 122 + let mut tasks = JoinSet::new(); 123 + 124 + loop { 125 + tokio::select! { 126 + received = socket.recv_from(&mut buf) => match received { 127 + Ok((len, peer)) => { 128 + let query = buf[..len].to_vec(); 129 + let socket = socket.clone(); 130 + let host = host.clone(); 131 + 132 + tasks.spawn(async move { 133 + if let Err(error) = handle_udp_query(socket, peer, query, host).await { 134 + warn!(%peer, %error, "dns udp query failed"); 135 + } 136 + }); 137 + } 138 + Err(error) => warn!(%error, "dns udp recv failed"), 139 + }, 140 + 141 + Some(result) = tasks.join_next(), if !tasks.is_empty() => { 142 + log_dns_task_result(result); 143 + } 144 + } 145 + } 146 + } 147 + 148 + async fn handle_udp_query( 149 + socket: Arc<UdpSocket>, 150 + peer: SocketAddr, 151 + query: Vec<u8>, 152 + host: HostDnsClient, 153 + ) -> Result<()> { 154 + let response = host.query(query).await?; 155 + 156 + socket 157 + .send_to(&response, peer) 158 + .await 159 + .context("send dns udp response")?; 160 + 161 + Ok(()) 162 + } 163 + 164 + async fn tcp_loop(listener: TcpListener, host: HostDnsClient) { 165 + let mut tasks = JoinSet::new(); 166 + 167 + loop { 168 + tokio::select! { 169 + accepted = listener.accept() => match accepted { 170 + Ok((conn, peer)) => { 171 + let host = host.clone(); 172 + 173 + tasks.spawn(async move { 174 + if let Err(error) = handle_tcp_conn(conn, host).await { 175 + warn!(%peer, %error, "dns tcp connection failed"); 176 + } 177 + }); 178 + } 179 + Err(error) => warn!(%error, "dns tcp accept failed"), 180 + }, 181 + 182 + Some(result) = tasks.join_next(), if !tasks.is_empty() => { 183 + log_dns_task_result(result); 184 + } 185 + } 186 + } 187 + } 188 + 189 + async fn handle_tcp_conn(mut tcp: TcpStream, host: HostDnsClient) -> Result<()> { 190 + loop { 191 + let query = tokio::time::timeout(DNS_TCP_IDLE_TIMEOUT, read_dns_packet(&mut tcp)) 192 + .await 193 + .context("dns tcp idle timeout")? 194 + .context("read dns tcp query")?; 195 + 196 + let Some(query) = query else { 197 + return Ok(()); 198 + }; 199 + 200 + let response = host.query(query).await?; 201 + 202 + write_dns_packet(&mut tcp, &response) 203 + .await 204 + .context("write dns tcp response")?; 205 + } 206 + } 207 + 208 + async fn read_dns_packet<R>(reader: &mut R) -> io::Result<Option<Vec<u8>>> 209 + where 210 + R: AsyncRead + Unpin, 211 + { 212 + let mut len_buf = [0; 2]; 213 + 214 + match reader.read_exact(&mut len_buf).await { 215 + Ok(_) => {} 216 + Err(error) if error.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), 217 + Err(error) => return Err(error), 218 + } 219 + 220 + let len = u16::from_be_bytes(len_buf) as usize; 221 + if len == 0 { 222 + return Err(io::Error::new( 223 + io::ErrorKind::InvalidData, 224 + "empty dns packet", 225 + )); 226 + } 227 + 228 + let mut packet = vec![0; len]; 229 + reader.read_exact(&mut packet).await?; 230 + Ok(Some(packet)) 231 + } 232 + 233 + async fn write_dns_packet<W>(writer: &mut W, packet: &[u8]) -> io::Result<()> 234 + where 235 + W: AsyncWrite + Unpin, 236 + { 237 + if packet.is_empty() || packet.len() > MAX_DNS_MESSAGE_BYTES { 238 + return Err(io::Error::new( 239 + io::ErrorKind::InvalidData, 240 + format!("invalid dns packet size {}", packet.len()), 241 + )); 242 + } 243 + 244 + writer 245 + .write_all(&(packet.len() as u16).to_be_bytes()) 246 + .await?; 247 + writer.write_all(packet).await?; 248 + writer.flush().await 249 + } 250 + 251 + fn log_dns_task_result(result: Result<(), JoinError>) { 252 + if let Err(error) = result { 253 + warn!(%error, "dns proxy task failed"); 254 + } 255 + }
+188
shuttle/src/exec.rs
··· 1 + use crate::command::{self, OutKind, Spec}; 2 + use crate::protocol::{self, Message, v1}; 3 + use nix::unistd::{Group, User}; 4 + use std::ffi::OsString; 5 + use std::time::Duration; 6 + use tokio::sync::mpsc::Sender; 7 + use tracing::info; 8 + 9 + const DEFAULT_USER: &str = "spindle-workflow"; 10 + 11 + pub async fn run(id: String, req: v1::ExecStart, out: Sender<Message>) { 12 + let send_exit = async |exit_code: i32, error: Option<String>, timed_out: bool| { 13 + let msg = Message { 14 + id: id.clone(), 15 + exec_exit: Some(v1::ExecExit { 16 + exit_code, 17 + error: protocol::error_or_empty(error), 18 + timed_out, 19 + }), 20 + ..Default::default() 21 + }; 22 + let _ = out.send(msg).await; 23 + }; 24 + 25 + if req.argv.is_empty() { 26 + send_exit(127, Some("missing argv".to_owned()), false).await; 27 + return; 28 + } 29 + 30 + let user = if req.user.is_empty() { 31 + DEFAULT_USER 32 + } else { 33 + req.user.as_str() 34 + }; 35 + let run_as = match resolve_user(user) { 36 + Ok(run_as) => run_as, 37 + Err(err) => { 38 + send_exit(127, Some(err), false).await; 39 + return; 40 + } 41 + }; 42 + 43 + let mut spec = Spec::new(req.argv[0].clone()) 44 + .args(req.argv[1..].iter().cloned()) 45 + .envs(parse_env(&req.env)) 46 + .run_as(run_as.uid, run_as.gid); 47 + if !req.cwd.is_empty() { 48 + spec = spec.cwd(req.cwd.clone()); 49 + } 50 + let timeout = 51 + (req.timeout_seconds > 0).then(|| Duration::from_secs(u64::from(req.timeout_seconds))); 52 + if let Some(timeout) = timeout { 53 + spec = spec.timeout(timeout); 54 + } 55 + 56 + info!( 57 + %id, 58 + user = %run_as.name, 59 + uid = run_as.uid, 60 + gid = run_as.gid, 61 + argv = ?req.argv, 62 + cwd = ?req.cwd, 63 + "starting exec" 64 + ); 65 + 66 + let cmd = match command::spawn_streaming(spec) { 67 + Ok(cmd) => cmd, 68 + Err(err) => { 69 + send_exit(127, Some(err.to_string()), false).await; 70 + return; 71 + } 72 + }; 73 + let (mut events, exit_task) = cmd.into_parts(); 74 + while let Some(event) = events.recv().await { 75 + let data = String::from_utf8_lossy(&event.data).into_owned(); 76 + let output = match event.kind { 77 + OutKind::Stdout => Message { 78 + id: id.clone(), 79 + exec_stdout: Some(v1::ExecStdout { data }), 80 + ..Default::default() 81 + }, 82 + OutKind::Stderr => Message { 83 + id: id.clone(), 84 + exec_stderr: Some(v1::ExecStderr { data }), 85 + ..Default::default() 86 + }, 87 + }; 88 + let _ = out.send(output).await; 89 + } 90 + let exit = match exit_task 91 + .await 92 + .unwrap_or_else(|error| Err(anyhow::anyhow!("command supervisor failed: {error}"))) 93 + { 94 + Ok(exit) => exit, 95 + Err(err) => { 96 + send_exit(127, Some(err.to_string()), false).await; 97 + return; 98 + } 99 + }; 100 + 101 + send_exit(exit.exit_code, exit.error, exit.timed_out).await 102 + } 103 + 104 + #[derive(Clone, Debug)] 105 + struct ResolvedUser { 106 + name: String, 107 + uid: u32, 108 + gid: u32, 109 + } 110 + 111 + fn resolve_user(spec: &str) -> Result<ResolvedUser, String> { 112 + let spec = spec.trim(); 113 + if spec.is_empty() { 114 + return resolve_user(DEFAULT_USER); 115 + } 116 + 117 + let (user_part, group_part) = spec 118 + .split_once(':') 119 + .map(|(user, group)| (user, Some(group))) 120 + .unwrap_or((spec, None)); 121 + 122 + let mut user = lookup_user(user_part)?; 123 + if let Some(group) = group_part.filter(|group| !group.is_empty()) { 124 + user.gid = lookup_group(group)?; 125 + } 126 + 127 + if user.uid == 0 || user.gid == 0 { 128 + return Err(format!("refusing to run exec as privileged user {spec:?}")); 129 + } 130 + 131 + Ok(user) 132 + } 133 + 134 + fn lookup_user(name: &str) -> Result<ResolvedUser, String> { 135 + match User::from_name(name) { 136 + Ok(Some(user)) => Ok(ResolvedUser { 137 + name: name.to_owned(), 138 + uid: user.uid.as_raw(), 139 + gid: user.gid.as_raw(), 140 + }), 141 + Ok(None) => { 142 + let uid = name 143 + .parse::<u32>() 144 + .map_err(|_| format!("workflow user {name:?} was not found"))?; 145 + Ok(ResolvedUser { 146 + name: name.to_owned(), 147 + uid, 148 + gid: uid, 149 + }) 150 + } 151 + Err(error) => Err(format!("lookup workflow user {name:?}: {error}")), 152 + } 153 + } 154 + 155 + fn lookup_group(name: &str) -> Result<u32, String> { 156 + match Group::from_name(name) { 157 + Ok(Some(group)) => Ok(group.gid.as_raw()), 158 + Ok(None) => name 159 + .parse::<u32>() 160 + .map_err(|_| format!("workflow group {name:?} was not found")), 161 + Err(error) => Err(format!("lookup workflow group {name:?}: {error}")), 162 + } 163 + } 164 + 165 + fn parse_env(values: &[String]) -> Vec<(OsString, OsString)> { 166 + values 167 + .iter() 168 + .filter_map(|value| value.split_once('=')) 169 + .map(|(key, value)| (OsString::from(key), OsString::from(value))) 170 + .collect() 171 + } 172 + 173 + #[cfg(test)] 174 + mod tests { 175 + use super::*; 176 + 177 + #[test] 178 + fn refuses_root_exec_user() { 179 + let err = resolve_user("root").unwrap_err(); 180 + assert!(err.contains("refusing to run exec as privileged user")); 181 + } 182 + 183 + #[test] 184 + fn refuses_root_exec_group() { 185 + let err = resolve_user("65534:0").unwrap_err(); 186 + assert!(err.contains("refusing to run exec as privileged user")); 187 + } 188 + }
shuttle/src/gen/file_descriptor_set.bin

This is a binary file and will not be displayed.

+146
shuttle/src/gen/spindle/agent/v1/spindle.agent.v1.rs
··· 1 + // @generated 2 + // This file is @generated by prost-build. 3 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 4 + pub struct Hello { 5 + #[prost(uint32, tag = "1")] 6 + pub protocol_version: u32, 7 + #[prost(string, tag = "2")] 8 + pub agent_version: ::prost::alloc::string::String, 9 + #[prost(string, tag = "3")] 10 + pub boot_id: ::prost::alloc::string::String, 11 + #[prost(string, tag = "4")] 12 + pub nix_version: ::prost::alloc::string::String, 13 + } 14 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 15 + pub struct Init { 16 + #[prost(string, tag = "1")] 17 + pub job_id: ::prost::alloc::string::String, 18 + #[prost(string, repeated, tag = "2")] 19 + pub cache_trusted_public_keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, 20 + #[prost(uint32, tag = "3")] 21 + pub cache_read_proxy_port: u32, 22 + #[prost(uint32, tag = "4")] 23 + pub cache_upload_proxy_port: u32, 24 + #[prost(uint32, tag = "5")] 25 + pub dns_proxy_port: u32, 26 + } 27 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 28 + pub struct ExecStart { 29 + #[prost(string, repeated, tag = "1")] 30 + pub argv: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, 31 + #[prost(string, repeated, tag = "2")] 32 + pub env: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, 33 + #[prost(string, tag = "3")] 34 + pub cwd: ::prost::alloc::string::String, 35 + #[prost(string, tag = "4")] 36 + pub user: ::prost::alloc::string::String, 37 + #[prost(uint32, tag = "5")] 38 + pub timeout_seconds: u32, 39 + } 40 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 41 + pub struct ExecStdout { 42 + #[prost(string, tag = "1")] 43 + pub data: ::prost::alloc::string::String, 44 + } 45 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 46 + pub struct ExecStderr { 47 + #[prost(string, tag = "1")] 48 + pub data: ::prost::alloc::string::String, 49 + } 50 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 51 + pub struct ExecExit { 52 + #[prost(int32, tag = "1")] 53 + pub exit_code: i32, 54 + #[prost(string, tag = "2")] 55 + pub error: ::prost::alloc::string::String, 56 + /// set when the guest killed the step on its own timeout timer, so the host 57 + /// can classify it as a timeout rather than inferring failure from exit_code. 58 + #[prost(bool, tag = "3")] 59 + pub timed_out: bool, 60 + } 61 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 62 + pub struct ActivateConfig { 63 + #[prost(string, tag = "1")] 64 + pub config_key: ::prost::alloc::string::String, 65 + #[prost(string, tag = "2")] 66 + pub base_config_hash: ::prost::alloc::string::String, 67 + #[prost(string, tag = "3")] 68 + pub user_config: ::prost::alloc::string::String, 69 + #[prost(string, tag = "4")] 70 + pub toplevel: ::prost::alloc::string::String, 71 + #[prost(uint32, tag = "5")] 72 + pub timeout_seconds: u32, 73 + } 74 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 75 + pub struct ActivateConfigResult { 76 + #[prost(string, tag = "1")] 77 + pub config_key: ::prost::alloc::string::String, 78 + #[prost(string, tag = "2")] 79 + pub toplevel: ::prost::alloc::string::String, 80 + #[prost(string, tag = "3")] 81 + pub error: ::prost::alloc::string::String, 82 + } 83 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 84 + pub struct BuiltPaths { 85 + #[prost(string, repeated, tag = "1")] 86 + pub paths: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, 87 + #[prost(string, tag = "2")] 88 + pub reason: ::prost::alloc::string::String, 89 + } 90 + #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] 91 + pub struct CacheDrain { 92 + #[prost(uint32, tag = "1")] 93 + pub timeout_seconds: u32, 94 + } 95 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 96 + pub struct CacheDrainResult { 97 + #[prost(string, tag = "1")] 98 + pub error: ::prost::alloc::string::String, 99 + #[prost(uint32, tag = "2")] 100 + pub cache_queued: u32, 101 + #[prost(uint32, tag = "3")] 102 + pub cache_active: u32, 103 + #[prost(uint32, tag = "4")] 104 + pub cache_uploaded: u32, 105 + #[prost(uint32, tag = "5")] 106 + pub cache_failed: u32, 107 + } 108 + #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] 109 + pub struct Poweroff {} 110 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 111 + pub struct PoweroffResult { 112 + #[prost(string, tag = "1")] 113 + pub error: ::prost::alloc::string::String, 114 + } 115 + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] 116 + pub struct Message { 117 + #[prost(string, tag = "1")] 118 + pub id: ::prost::alloc::string::String, 119 + #[prost(message, optional, tag = "2")] 120 + pub hello: ::core::option::Option<Hello>, 121 + #[prost(message, optional, tag = "3")] 122 + pub init: ::core::option::Option<Init>, 123 + #[prost(message, optional, tag = "4")] 124 + pub exec_start: ::core::option::Option<ExecStart>, 125 + #[prost(message, optional, tag = "5")] 126 + pub exec_stdout: ::core::option::Option<ExecStdout>, 127 + #[prost(message, optional, tag = "6")] 128 + pub exec_stderr: ::core::option::Option<ExecStderr>, 129 + #[prost(message, optional, tag = "7")] 130 + pub exec_exit: ::core::option::Option<ExecExit>, 131 + #[prost(message, optional, tag = "8")] 132 + pub activate_config: ::core::option::Option<ActivateConfig>, 133 + #[prost(message, optional, tag = "9")] 134 + pub activate_config_result: ::core::option::Option<ActivateConfigResult>, 135 + #[prost(message, optional, tag = "10")] 136 + pub built_paths: ::core::option::Option<BuiltPaths>, 137 + #[prost(message, optional, tag = "11")] 138 + pub cache_drain: ::core::option::Option<CacheDrain>, 139 + #[prost(message, optional, tag = "12")] 140 + pub cache_drain_result: ::core::option::Option<CacheDrainResult>, 141 + #[prost(message, optional, tag = "13")] 142 + pub poweroff: ::core::option::Option<Poweroff>, 143 + #[prost(message, optional, tag = "14")] 144 + pub poweroff_result: ::core::option::Option<PoweroffResult>, 145 + } 146 + // @@protoc_insertion_point(module)
+92
shuttle/src/host_proxy.rs
··· 1 + use anyhow::{Context, Result}; 2 + use tokio::net::{TcpListener, TcpStream}; 3 + use tokio::task::{JoinError, JoinHandle, JoinSet}; 4 + use tokio_vsock::{VsockAddr, VsockStream}; 5 + use tracing::{info, warn}; 6 + 7 + // this implements a vsock <-> tcp proxy for communicating with spindle 8 + pub struct VsockTcpProxy { 9 + url: String, 10 + handle: JoinHandle<()>, 11 + } 12 + 13 + impl VsockTcpProxy { 14 + pub async fn start( 15 + name: &'static str, 16 + bind_addr: &str, 17 + host_cid: u32, 18 + host_port: u32, 19 + ) -> Result<Self> { 20 + if host_port == 0 { 21 + anyhow::bail!("port 0 cant be requested"); 22 + } 23 + 24 + let listener = TcpListener::bind(bind_addr) 25 + .await 26 + .with_context(|| format!("bind {name} listener {bind_addr}"))?; 27 + let local_addr = listener 28 + .local_addr() 29 + .with_context(|| format!("{name} local address"))?; 30 + let url = format!("http://{local_addr}"); 31 + 32 + let handle = tokio::spawn(async move { 33 + accept_loop(name, listener, host_cid, host_port).await; 34 + }); 35 + 36 + info!(%url, host_cid, host_port, "{name} ready"); 37 + Ok(Self { url, handle }) 38 + } 39 + 40 + pub fn url(&self) -> &str { 41 + &self.url 42 + } 43 + } 44 + 45 + impl Drop for VsockTcpProxy { 46 + fn drop(&mut self) { 47 + self.handle.abort(); 48 + } 49 + } 50 + 51 + async fn accept_loop(name: &'static str, listener: TcpListener, host_cid: u32, host_port: u32) { 52 + let mut tasks = JoinSet::new(); 53 + loop { 54 + tokio::select! { 55 + accepted = listener.accept() => match accepted { 56 + Ok((conn, _addr)) => { 57 + tasks.spawn(async move { 58 + if let Err(error) = proxy_conn(name, conn, host_cid, host_port).await { 59 + warn!(%error, "{name} connection failed"); 60 + } 61 + }); 62 + } 63 + Err(error) => warn!(%error, "{name} accept failed"), 64 + }, 65 + Some(result) = tasks.join_next(), if !tasks.is_empty() => { 66 + log_proxy_task_result(result); 67 + } 68 + } 69 + } 70 + } 71 + 72 + fn log_proxy_task_result(result: Result<(), JoinError>) { 73 + if let Err(error) = result { 74 + warn!(%error, "proxy task failed"); 75 + } 76 + } 77 + 78 + async fn proxy_conn( 79 + name: &'static str, 80 + mut tcp: TcpStream, 81 + host_cid: u32, 82 + host_port: u32, 83 + ) -> Result<()> { 84 + let mut host = VsockStream::connect(VsockAddr::new(host_cid, host_port)) 85 + .await 86 + .with_context(|| format!("dial host {name} cid={host_cid} port={host_port}"))?; 87 + 88 + tokio::io::copy_bidirectional(&mut tcp, &mut host) 89 + .await 90 + .context("proxy connection copy")?; 91 + Ok(()) 92 + }
+49
shuttle/src/logging.rs
··· 1 + use std::fs::{File, OpenOptions}; 2 + use std::io::{self, Write}; 3 + use tracing_subscriber::EnvFilter; 4 + use tracing_subscriber::fmt::MakeWriter; 5 + 6 + pub fn init() { 7 + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); 8 + tracing_subscriber::fmt() 9 + .with_env_filter(filter) 10 + .with_writer(ConsoleAndStderr) 11 + .init(); 12 + } 13 + 14 + #[derive(Clone, Copy, Debug)] 15 + struct ConsoleAndStderr; 16 + 17 + struct TeeWriter { 18 + stderr: io::Stderr, 19 + console: Option<File>, 20 + } 21 + 22 + impl<'a> MakeWriter<'a> for ConsoleAndStderr { 23 + type Writer = TeeWriter; 24 + 25 + fn make_writer(&'a self) -> Self::Writer { 26 + TeeWriter { 27 + stderr: io::stderr(), 28 + console: OpenOptions::new().write(true).open("/dev/console").ok(), 29 + } 30 + } 31 + } 32 + 33 + impl Write for TeeWriter { 34 + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { 35 + self.stderr.write_all(buf)?; 36 + if let Some(console) = &mut self.console { 37 + console.write_all(buf)?; 38 + } 39 + Ok(buf.len()) 40 + } 41 + 42 + fn flush(&mut self) -> io::Result<()> { 43 + self.stderr.flush()?; 44 + if let Some(console) = &mut self.console { 45 + console.flush()?; 46 + } 47 + Ok(()) 48 + } 49 + }
+62
shuttle/src/main.rs
··· 1 + #![cfg(target_os = "linux")] 2 + 3 + mod activation; 4 + mod cache; 5 + mod command; 6 + mod dns_proxy; 7 + mod exec; 8 + mod host_proxy; 9 + mod logging; 10 + mod nix_config; 11 + mod protocol; 12 + mod session; 13 + 14 + use std::env; 15 + use std::time::Duration; 16 + use tracing::warn; 17 + 18 + #[macro_export] 19 + macro_rules! cfg { 20 + (@val $key:expr) => { 21 + std::env::var(concat!("SHUTTLE_", $key)) 22 + }; 23 + ($key:expr, $default:expr) => { 24 + cfg!(@val $key) 25 + .ok() 26 + .and_then(|s| s.parse().ok()) 27 + .unwrap_or($default.to_owned()) 28 + .into() 29 + }; 30 + } 31 + 32 + fn cmdline_param(key: &str) -> Option<String> { 33 + let cmdline = std::fs::read_to_string("/proc/cmdline").ok()?; 34 + cmdline 35 + .split_whitespace() 36 + .find_map(|tok| Some(tok.strip_prefix(key)?.strip_prefix('=')?.to_owned())) 37 + } 38 + 39 + #[tokio::main] 40 + async fn main() { 41 + logging::init(); 42 + 43 + let args: Vec<String> = env::args().collect(); 44 + if args.get(1).map(String::as_str) == Some("enqueue-built-paths") { 45 + cache::enqueue_built_paths(&args[2..]).await; 46 + return; 47 + } 48 + 49 + let port: u32 = cfg!(@val "VSOCK_PORT") 50 + .ok() 51 + .or_else(|| cmdline_param("shuttle.vsock_port")) 52 + .and_then(|s| s.parse().ok()) 53 + .unwrap_or(protocol::DEFAULT_PORT); 54 + let host_cid: u32 = cfg!("HOST_CID", tokio_vsock::VMADDR_CID_HOST); 55 + 56 + loop { 57 + if let Err(error) = session::run(host_cid, port).await { 58 + warn!(host_cid, port, %error, "agent session failed"); 59 + } 60 + tokio::time::sleep(Duration::from_secs(1)).await; 61 + } 62 + }
+223
shuttle/src/nix_config.rs
··· 1 + use crate::command::{self, Spec}; 2 + use crate::protocol::v1; 3 + use anyhow::{Context, Result}; 4 + use serde::{Deserialize, Serialize}; 5 + use std::collections::HashSet; 6 + use std::fmt::Write as _; 7 + use std::fs; 8 + use std::io::Write as _; 9 + use std::os::unix::fs::PermissionsExt; 10 + use std::path::{Path, PathBuf}; 11 + use std::time::Duration; 12 + use tempfile::Builder; 13 + use tracing::{info, warn}; 14 + 15 + pub const SPINDLE_RUN_DIR: &str = "/run/spindle"; 16 + pub const SPINDLE_NIX_CONFIG: &str = "/run/spindle/nix.conf"; 17 + pub const SPINDLE_CACHE_CONFIG: &str = "/run/spindle/cache.json"; 18 + pub const SYSTEMCTL_EXECUTABLE: &str = "/run/current-system/sw/bin/systemctl"; 19 + 20 + // nix lives in different places depending on the guest OS (NixOS system 21 + // profile vs. plain /usr/local on e.g. alpine) 22 + pub fn nix_executable() -> &'static str { 23 + static NIX: once_cell::sync::Lazy<&'static str> = once_cell::sync::Lazy::new(|| { 24 + let paths = [ 25 + "/run/current-system/sw/bin/nix", 26 + "/usr/local/bin/nix", 27 + "/usr/bin/nix", 28 + ]; 29 + for candidate in paths { 30 + if Path::new(candidate).exists() { 31 + return candidate; 32 + } 33 + } 34 + "/run/current-system/sw/bin/nix" 35 + }); 36 + &NIX 37 + } 38 + 39 + #[derive(Clone, Debug, Default, Deserialize, Serialize)] 40 + pub struct RuntimeCacheConfig { 41 + pub read_urls: Vec<String>, 42 + pub trusted_public_keys: Vec<String>, 43 + } 44 + 45 + // configures nix daemon with the configuration passed from host 46 + pub async fn configure(init: &v1::Init, read_proxy_url: &str) -> Result<RuntimeCacheConfig> { 47 + let read_urls = vec![read_proxy_url.to_owned()]; 48 + let cfg = RuntimeCacheConfig { 49 + read_urls, 50 + trusted_public_keys: clean_strings(&init.cache_trusted_public_keys), 51 + }; 52 + 53 + if cfg.read_urls.is_empty() && cfg.trusted_public_keys.is_empty() { 54 + remove_if_exists(SPINDLE_NIX_CONFIG)?; 55 + remove_if_exists(SPINDLE_CACHE_CONFIG)?; 56 + return Ok(cfg); 57 + } 58 + 59 + fs::create_dir_all(SPINDLE_RUN_DIR).with_context(|| format!("create {SPINDLE_RUN_DIR}"))?; 60 + 61 + let cache_json = serde_json::to_vec_pretty(&cfg)?; 62 + write_file_atomic(SPINDLE_CACHE_CONFIG, &cache_json, 0o600)?; 63 + 64 + let mut nix_conf = String::new(); 65 + if !cfg.read_urls.is_empty() { 66 + writeln!( 67 + &mut nix_conf, 68 + "extra-substituters = {}", 69 + cfg.read_urls.join(" ") 70 + ) 71 + .unwrap(); 72 + } 73 + if !cfg.trusted_public_keys.is_empty() { 74 + writeln!( 75 + &mut nix_conf, 76 + "extra-trusted-public-keys = {}", 77 + cfg.trusted_public_keys.join(" ") 78 + ) 79 + .unwrap(); 80 + } 81 + 82 + if nix_conf.is_empty() { 83 + remove_if_exists(SPINDLE_NIX_CONFIG)?; 84 + return Ok(cfg); 85 + } 86 + 87 + write_file_atomic(SPINDLE_NIX_CONFIG, nix_conf.as_bytes(), 0o644)?; 88 + restart_nix_daemon().await; 89 + info!( 90 + read_urls = ?cfg.read_urls, 91 + trusted_public_keys = cfg.trusted_public_keys.len(), 92 + "configured nix cache" 93 + ); 94 + 95 + Ok(cfg) 96 + } 97 + 98 + pub fn clean_strings(values: &[String]) -> Vec<String> { 99 + let mut seen = HashSet::new(); 100 + let mut out = Vec::with_capacity(values.len()); 101 + 102 + for value in values { 103 + let value = value.trim(); 104 + if value.is_empty() || !seen.insert(value.to_owned()) { 105 + continue; 106 + } 107 + out.push(value.to_owned()); 108 + } 109 + 110 + out 111 + } 112 + 113 + pub fn clean_store_paths(values: &[String]) -> Vec<String> { 114 + clean_strings(values) 115 + .into_iter() 116 + .filter(|value| value.starts_with("/nix/store/")) 117 + .collect() 118 + } 119 + 120 + pub async fn nix_version() -> String { 121 + let spec = Spec::new(nix_executable()) 122 + .arg("--version") 123 + .timeout(Duration::from_secs(1)); 124 + 125 + let Ok(output) = command::run_capture(spec).await else { 126 + return String::new(); 127 + }; 128 + if !output.success() { 129 + return String::new(); 130 + } 131 + 132 + String::from_utf8_lossy(&output.stdout).trim().to_owned() 133 + } 134 + 135 + fn write_file_atomic(path: impl AsRef<Path>, data: &[u8], mode: u32) -> Result<()> { 136 + let path = path.as_ref(); 137 + let dir = path.parent().unwrap_or_else(|| Path::new(".")); 138 + let prefix = path 139 + .file_name() 140 + .and_then(|name| name.to_str()) 141 + .map(|name| format!(".{name}.tmp-")) 142 + .unwrap_or_else(|| ".tmp-".to_owned()); 143 + 144 + let mut tmp = Builder::new() 145 + .prefix(&prefix) 146 + .permissions(fs::Permissions::from_mode(mode)) 147 + .tempfile_in(dir) 148 + .with_context(|| format!("create temp file for {}", path.display()))?; 149 + 150 + // no separate sync here because we don't need to be crash-safe (this is an 151 + // ephemeral vm) only atomicity is needed 152 + tmp.write_all(data) 153 + .with_context(|| format!("write temp file for {}", path.display()))?; 154 + tmp.persist(path) 155 + .map(|_| ()) 156 + .map_err(|err| err.error) 157 + .with_context(|| format!("install {}", path.display())) 158 + } 159 + 160 + const NIX_DAEMON_SOCKET: &str = "/nix/var/nix/daemon-socket/socket"; 161 + 162 + async fn restart_nix_daemon() { 163 + let systemd = Path::new(SYSTEMCTL_EXECUTABLE).exists(); 164 + let spec = if systemd { 165 + Spec::new(SYSTEMCTL_EXECUTABLE) 166 + .args(["try-restart", "nix-daemon.service"]) 167 + .timeout(Duration::from_secs(5)) 168 + } else { 169 + // on non-systemd we can just kill the daemon and it should restart 170 + Spec::new("pkill") 171 + .args(["-f", "nix-daemon"]) 172 + .timeout(Duration::from_secs(5)) 173 + }; 174 + 175 + match command::run_capture(spec).await { 176 + Ok(output) if output.success() => { 177 + if !systemd { 178 + // init has to respawn the daemon before any step needs it 179 + wait_for_nix_daemon_socket(Duration::from_secs(5)).await; 180 + } 181 + } 182 + // pkill exits 1 when nothing matched, ie. no daemon to restart 183 + Ok(output) if !systemd && output.exit.exit_code == 1 => { 184 + info!("no nix-daemon running, skipping restart") 185 + } 186 + Ok(output) => warn!( 187 + exit_code = output.exit.exit_code, 188 + error = ?output.exit.error, 189 + output = %output.combined_lossy(), 190 + "nix-daemon restart failed" 191 + ), 192 + Err(error) => warn!(%error, "nix-daemon restart failed"), 193 + } 194 + } 195 + 196 + async fn wait_for_nix_daemon_socket(timeout: Duration) { 197 + let deadline = tokio::time::Instant::now() + timeout; 198 + loop { 199 + if tokio::net::UnixStream::connect(NIX_DAEMON_SOCKET) 200 + .await 201 + .is_ok() 202 + { 203 + return; 204 + } 205 + if tokio::time::Instant::now() >= deadline { 206 + warn!( 207 + socket = NIX_DAEMON_SOCKET, 208 + "nix-daemon did not come back after restart" 209 + ); 210 + return; 211 + } 212 + tokio::time::sleep(Duration::from_millis(100)).await; 213 + } 214 + } 215 + 216 + fn remove_if_exists(path: impl AsRef<Path>) -> Result<()> { 217 + let path: PathBuf = path.as_ref().to_owned(); 218 + match fs::remove_file(&path) { 219 + Ok(()) => Ok(()), 220 + Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(()), 221 + Err(error) => Err(error).with_context(|| format!("remove {}", path.display())), 222 + } 223 + }
+212
shuttle/src/protocol.rs
··· 1 + use once_cell::sync::Lazy; 2 + use prost::Message as ProstMessage; 3 + use prost_reflect::DescriptorPool; 4 + use std::io; 5 + use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; 6 + 7 + pub mod v1 { 8 + include!("gen/spindle/agent/v1/spindle.agent.v1.rs"); 9 + } 10 + 11 + pub use v1::Message; 12 + 13 + pub static DESCRIPTOR_POOL: Lazy<DescriptorPool> = Lazy::new(|| { 14 + let bytes = include_bytes!("gen/file_descriptor_set.bin"); 15 + DescriptorPool::decode(&bytes[..]).unwrap() 16 + }); 17 + 18 + macro_rules! impl_reflect { 19 + ($($t:ident),* $(,)?) => { 20 + $( 21 + impl prost_reflect::ReflectMessage for v1::$t { 22 + fn descriptor(&self) -> prost_reflect::MessageDescriptor { 23 + DESCRIPTOR_POOL 24 + .get_message_by_name(concat!("spindle.agent.v1.", stringify!($t))) 25 + .unwrap() 26 + } 27 + } 28 + )* 29 + }; 30 + } 31 + 32 + impl_reflect!( 33 + Hello, 34 + Init, 35 + ExecStart, 36 + ExecStdout, 37 + ExecStderr, 38 + ExecExit, 39 + ActivateConfig, 40 + ActivateConfigResult, 41 + BuiltPaths, 42 + CacheDrain, 43 + CacheDrainResult, 44 + Poweroff, 45 + PoweroffResult, 46 + Message, 47 + ); 48 + 49 + pub const PROTOCOL_VERSION: u32 = 1; 50 + pub const DEFAULT_PORT: u32 = 10240; 51 + pub const MAX_MESSAGE_BYTES: usize = 1024 * 1024; 52 + 53 + #[macro_export] 54 + macro_rules! on_payload { 55 + (ref $msg:expr, { $( $field:ident => $body:expr ),* $(,)? }) => { 56 + #[allow(unused_variables)] 57 + $(if let Some(ref $field) = $msg.$field { Some($body) } else)* { None } 58 + }; 59 + ($msg:expr, { $( $field:ident => $body:expr ),* $(,)? }) => { 60 + $(if let Some($field) = $msg.$field { Some($body) } else )* { None } 61 + }; 62 + } 63 + 64 + pub fn kind(msg: &Message) -> &'static str { 65 + // todo(dawn): maybe eventually we should have a custom protoc plugin for 66 + // generating an enum, right now not worth it, when we have more needs for 67 + // it imo we can consider it again 68 + on_payload!(ref msg, { 69 + hello => "hello", 70 + init => "init", 71 + exec_start => "exec_start", 72 + exec_stdout => "exec_stdout", 73 + exec_stderr => "exec_stderr", 74 + exec_exit => "exec_exit", 75 + activate_config => "activate_config", 76 + activate_config_result => "activate_config_result", 77 + built_paths => "built_paths", 78 + cache_drain => "cache_drain", 79 + cache_drain_result => "cache_drain_result", 80 + poweroff => "poweroff", 81 + poweroff_result => "poweroff_result", 82 + }) 83 + .unwrap_or_else(|| unreachable!("validated message has no payload")) 84 + } 85 + 86 + pub fn error_or_empty(error: Option<String>) -> String { 87 + error.filter(|error| !error.is_empty()).unwrap_or_default() 88 + } 89 + 90 + pub async fn write_message<W: AsyncWrite + Unpin>(writer: &mut W, msg: &Message) -> io::Result<()> { 91 + if let Err(err) = prost_protovalidate::validate(msg) { 92 + return Err(io::Error::new( 93 + io::ErrorKind::InvalidData, 94 + format!("validate agent message: {err}"), 95 + )); 96 + } 97 + 98 + let mut data = Vec::new(); 99 + msg.encode(&mut data) 100 + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; 101 + if data.len() > MAX_MESSAGE_BYTES { 102 + return Err(io::Error::new( 103 + io::ErrorKind::InvalidData, 104 + format!("agent message exceeded {MAX_MESSAGE_BYTES} bytes"), 105 + )); 106 + } 107 + 108 + writer.write_all(&(data.len() as u32).to_be_bytes()).await?; 109 + writer.write_all(&data).await?; 110 + writer.flush().await 111 + } 112 + 113 + pub async fn read_message<R: AsyncRead + Unpin>(reader: &mut R) -> io::Result<Option<Message>> { 114 + let Some(header) = read_header(reader).await? else { 115 + return Ok(None); 116 + }; 117 + let size = u32::from_be_bytes(header) as usize; 118 + if size > MAX_MESSAGE_BYTES { 119 + return Err(io::Error::new( 120 + io::ErrorKind::InvalidData, 121 + format!("agent message exceeded {MAX_MESSAGE_BYTES} bytes"), 122 + )); 123 + } 124 + 125 + let mut data = vec![0; size]; 126 + reader.read_exact(&mut data).await?; 127 + let msg = Message::decode(&data[..]) 128 + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; 129 + 130 + if let Err(err) = prost_protovalidate::validate(&msg) { 131 + return Err(io::Error::new( 132 + io::ErrorKind::InvalidData, 133 + format!("validate agent message: {err}"), 134 + )); 135 + } 136 + 137 + Ok(Some(msg)) 138 + } 139 + 140 + async fn read_header<R: AsyncRead + Unpin>(reader: &mut R) -> io::Result<Option<[u8; 4]>> { 141 + let mut header = [0; 4]; 142 + let mut read = 0; 143 + while read < header.len() { 144 + match reader.read(&mut header[read..]).await { 145 + Ok(0) if read == 0 => return Ok(None), 146 + Ok(0) => { 147 + return Err(io::Error::new( 148 + io::ErrorKind::UnexpectedEof, 149 + "partial agent message header", 150 + )); 151 + } 152 + Ok(n) => read += n, 153 + Err(err) if err.kind() == io::ErrorKind::Interrupted => {} 154 + Err(err) => return Err(err), 155 + } 156 + } 157 + Ok(Some(header)) 158 + } 159 + 160 + #[cfg(test)] 161 + mod tests { 162 + use super::*; 163 + 164 + #[tokio::test] 165 + async fn round_trips_protobuf_message() { 166 + let msg = Message { 167 + id: "built-paths".to_owned(), 168 + built_paths: Some(v1::BuiltPaths { 169 + paths: vec!["/nix/store/abc-package".to_owned()], 170 + reason: "post_build_hook".to_owned(), 171 + }), 172 + ..Default::default() 173 + }; 174 + 175 + let mut encoded = Vec::new(); 176 + write_message(&mut encoded, &msg).await.unwrap(); 177 + 178 + let decoded = read_message(&mut &encoded[..]).await.unwrap().unwrap(); 179 + assert!(decoded.built_paths.is_some()); 180 + if let Some(p) = decoded.built_paths { 181 + assert_eq!(p.paths, ["/nix/store/abc-package"]); 182 + assert_eq!(p.reason, "post_build_hook"); 183 + } 184 + } 185 + 186 + #[test] 187 + fn validates_messages() { 188 + // 1. valid message (exactly one field set) 189 + let valid = Message { 190 + id: "test-1".to_owned(), 191 + hello: Some(v1::Hello::default()), 192 + ..Default::default() 193 + }; 194 + assert!(prost_protovalidate::validate(&valid).is_ok()); 195 + 196 + // 2. invalid message (zero fields set) 197 + let invalid_zero = Message { 198 + id: "test-2".to_owned(), 199 + ..Default::default() 200 + }; 201 + assert!(prost_protovalidate::validate(&invalid_zero).is_err()); 202 + 203 + // 3. invalid message (multiple fields set) 204 + let invalid_multi = Message { 205 + id: "test-3".to_owned(), 206 + hello: Some(v1::Hello::default()), 207 + init: Some(v1::Init::default()), 208 + ..Default::default() 209 + }; 210 + assert!(prost_protovalidate::validate(&invalid_multi).is_err()); 211 + } 212 + }
+244
shuttle/src/session.rs
··· 1 + use crate::cache::{CacheUploadManager, ReadCacheProxy, WriteCacheProxy}; 2 + use crate::command::Spec; 3 + use crate::dns_proxy::DnsProxy; 4 + use crate::exec; 5 + use crate::nix_config::{self, SYSTEMCTL_EXECUTABLE}; 6 + use crate::on_payload; 7 + use crate::protocol::{self, Message, v1}; 8 + use crate::{activation, command}; 9 + use anyhow::{Context, Result, bail}; 10 + use std::time::Duration; 11 + use tokio::io::{AsyncWrite, BufReader}; 12 + use tokio::sync::mpsc::{self, Sender}; 13 + use tokio::task::{JoinError, JoinSet}; 14 + use tokio_vsock::{VsockAddr, VsockStream}; 15 + use tracing::{info, warn}; 16 + 17 + pub async fn run(host_cid: u32, port: u32) -> Result<()> { 18 + let mut conn = VsockStream::connect(VsockAddr::new(host_cid, port)) 19 + .await 20 + .with_context(|| format!("dial host vsock cid={host_cid} port={port}"))?; 21 + 22 + send_hello(&mut conn).await?; 23 + 24 + let (reader_conn, writer_conn) = tokio::io::split(conn); 25 + let (out_tx, out_rx) = mpsc::channel::<Message>(256); 26 + let writer = tokio::spawn(async move { writer_loop(writer_conn, out_rx).await }); 27 + let mut reader = BufReader::new(reader_conn); 28 + 29 + let init = match protocol::read_message(&mut reader).await? { 30 + Some(Message { 31 + init: Some(init), .. 32 + }) => init, 33 + Some(other) => bail!("expected init, got {}", protocol::kind(&other)), 34 + None => bail!("read init: EOF"), 35 + }; 36 + info!(job_id = %init.job_id, "received init"); 37 + 38 + let read_proxy = ReadCacheProxy::start(host_cid, init.cache_read_proxy_port) 39 + .await 40 + .context("start read cache proxy")?; 41 + let write_proxy = WriteCacheProxy::start(host_cid, init.cache_upload_proxy_port) 42 + .await 43 + .context("start write cache proxy")?; 44 + let _dns_proxy = DnsProxy::start(host_cid, init.dns_proxy_port) 45 + .await 46 + .context("start dns proxy")?; 47 + let _cache_cfg = nix_config::configure( 48 + &init, 49 + read_proxy.as_ref().map(ReadCacheProxy::url).unwrap_or(""), 50 + ) 51 + .await 52 + .context("configure nix cache")?; 53 + let uploader = CacheUploadManager::start( 54 + write_proxy.as_ref().map(WriteCacheProxy::url).unwrap_or(""), 55 + out_tx.clone(), 56 + ) 57 + .await 58 + .context("start cache upload manager")?; 59 + 60 + let mut tasks = JoinSet::new(); 61 + let read_result: Result<()> = loop { 62 + tokio::select! { 63 + read = protocol::read_message(&mut reader) => match read { 64 + Ok(Some(msg)) => spawn_message_task(&mut tasks, msg, &out_tx, uploader.clone()), 65 + Ok(None) => break Ok(()), 66 + Err(error) => break Err(error).context("read message"), 67 + }, 68 + Some(result) = tasks.join_next(), if !tasks.is_empty() => { 69 + log_task_result(result, false); 70 + } 71 + } 72 + }; 73 + 74 + tasks.abort_all(); 75 + while let Some(result) = tasks.join_next().await { 76 + log_task_result(result, true); 77 + } 78 + 79 + drop(out_tx); 80 + let _ = writer.await; 81 + read_result?; 82 + Ok(()) 83 + } 84 + 85 + fn spawn_message_task( 86 + tasks: &mut JoinSet<()>, 87 + msg: Message, 88 + out_tx: &Sender<Message>, 89 + uploader: Option<CacheUploadManager>, 90 + ) { 91 + let kind = protocol::kind(&msg); 92 + let handle = on_payload!(msg, { 93 + activate_config => tasks.spawn(activation::run(msg.id, activate_config, out_tx.clone())), 94 + exec_start => tasks.spawn(exec::run(msg.id, exec_start, out_tx.clone())), 95 + cache_drain => tasks.spawn(run_cache_drain(msg.id, cache_drain, out_tx.clone(), uploader)), 96 + poweroff => tasks.spawn(run_poweroff(msg.id, poweroff, out_tx.clone())), 97 + }); 98 + if handle.is_none() { 99 + warn!(kind, "ignoring unsupported message"); 100 + } 101 + } 102 + 103 + fn log_task_result(result: Result<(), JoinError>, shutting_down: bool) { 104 + match result { 105 + Ok(()) => {} 106 + Err(error) if shutting_down && error.is_cancelled() => {} 107 + Err(error) => warn!(%error, "session handler task failed"), 108 + } 109 + } 110 + 111 + async fn writer_loop<W>(mut conn: W, mut rx: mpsc::Receiver<Message>) 112 + where 113 + W: AsyncWrite + Unpin, 114 + { 115 + while let Some(msg) = rx.recv().await { 116 + if let Err(error) = protocol::write_message(&mut conn, &msg).await { 117 + warn!(%error, "failed to write protocol message"); 118 + break; 119 + } 120 + } 121 + } 122 + 123 + async fn send_hello(conn: &mut VsockStream) -> Result<()> { 124 + let boot_id = tokio::fs::read_to_string("/proc/sys/kernel/random/boot_id") 125 + .await 126 + .unwrap_or_default() 127 + .trim() 128 + .to_owned(); 129 + let nix_version = nix_config::nix_version().await; 130 + 131 + let hello_payload = v1::Hello { 132 + protocol_version: protocol::PROTOCOL_VERSION, 133 + agent_version: env!("CARGO_PKG_VERSION").to_string(), 134 + boot_id: boot_id.clone(), 135 + nix_version: nix_version.clone(), 136 + }; 137 + info!( 138 + protocol = hello_payload.protocol_version, 139 + version = %hello_payload.agent_version, 140 + boot = %hello_payload.boot_id, 141 + nix = %hello_payload.nix_version, 142 + "sent hello" 143 + ); 144 + let hello = Message { 145 + id: "hello".to_owned(), 146 + hello: Some(hello_payload), 147 + ..Default::default() 148 + }; 149 + 150 + protocol::write_message(conn, &hello) 151 + .await 152 + .context("send hello")?; 153 + Ok(()) 154 + } 155 + 156 + async fn run_cache_drain( 157 + id: String, 158 + req: v1::CacheDrain, 159 + out: Sender<Message>, 160 + uploader: Option<CacheUploadManager>, 161 + ) { 162 + let timeout = 163 + (req.timeout_seconds > 0).then(|| Duration::from_secs(u64::from(req.timeout_seconds))); 164 + let stats = match uploader.as_ref() { 165 + Some(uploader) => uploader.drain(timeout).await, 166 + None => Default::default(), 167 + }; 168 + 169 + if let Some(error) = &stats.last_error { 170 + warn!( 171 + %id, 172 + pending = stats.pending, 173 + active = stats.active, 174 + uploaded = stats.uploaded, 175 + failed = stats.failed, 176 + %error, 177 + "cache drain completed with error" 178 + ); 179 + } else { 180 + info!( 181 + %id, 182 + uploaded = stats.uploaded, 183 + failed = stats.failed, 184 + "cache drain completed" 185 + ); 186 + } 187 + 188 + let result = Message { 189 + id, 190 + cache_drain_result: Some(v1::CacheDrainResult { 191 + error: protocol::error_or_empty(stats.last_error), 192 + cache_queued: stats.pending, 193 + cache_active: stats.active, 194 + cache_uploaded: stats.uploaded, 195 + cache_failed: stats.failed, 196 + }), 197 + ..Default::default() 198 + }; 199 + let _ = out.send(result).await; 200 + } 201 + 202 + async fn run_poweroff(id: String, _req: v1::Poweroff, out: Sender<Message>) { 203 + let result = Message { 204 + id, 205 + poweroff_result: Some(v1::PoweroffResult { 206 + error: String::new(), 207 + }), 208 + ..Default::default() 209 + }; 210 + let _ = out.send(result).await; 211 + 212 + tokio::spawn(async move { 213 + tokio::time::sleep(Duration::from_millis(100)).await; 214 + 215 + // prefer a clean shutdown through the init system when one is around 216 + // (systemd on NixOS, busybox/openrc elsewhere), then fall back to the 217 + // raw reboot(2) syscall on minimal guests 218 + for poweroff in [SYSTEMCTL_EXECUTABLE, "/sbin/poweroff", "/usr/sbin/poweroff"] { 219 + if !std::path::Path::new(poweroff).exists() { 220 + continue; 221 + } 222 + let mut spec = Spec::new(poweroff).timeout(Duration::from_secs(5)); 223 + if poweroff == SYSTEMCTL_EXECUTABLE { 224 + spec = spec.args(["poweroff"]); 225 + } 226 + match command::run_capture(spec).await { 227 + Ok(output) if output.success() => return, 228 + Ok(output) => warn!( 229 + %poweroff, 230 + exit_code = output.exit.exit_code, 231 + error = ?output.exit.error, 232 + output = %output.combined_lossy(), 233 + "poweroff command failed" 234 + ), 235 + Err(error) => warn!(%poweroff, %error, "poweroff command failed"), 236 + } 237 + } 238 + 239 + // only ever returns on failure 240 + let error = 241 + nix::sys::reboot::reboot(nix::sys::reboot::RebootMode::RB_POWER_OFF).unwrap_err(); 242 + warn!(%error, "reboot(RB_POWER_OFF) syscall failed"); 243 + }); 244 + }
+1102
spindle/agentproto/gen/agent.pb.go
··· 1 + // Code generated by protoc-gen-go. DO NOT EDIT. 2 + // versions: 3 + // protoc-gen-go v1.36.11 4 + // protoc (unknown) 5 + // source: spindle/agent/v1/agent.proto 6 + 7 + package agentv1 8 + 9 + import ( 10 + _ "buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go/buf/validate" 11 + protoreflect "google.golang.org/protobuf/reflect/protoreflect" 12 + protoimpl "google.golang.org/protobuf/runtime/protoimpl" 13 + reflect "reflect" 14 + sync "sync" 15 + unsafe "unsafe" 16 + ) 17 + 18 + const ( 19 + // Verify that this generated code is sufficiently up-to-date. 20 + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) 21 + // Verify that runtime/protoimpl is sufficiently up-to-date. 22 + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) 23 + ) 24 + 25 + type Hello struct { 26 + state protoimpl.MessageState `protogen:"open.v1"` 27 + ProtocolVersion uint32 `protobuf:"varint,1,opt,name=protocol_version,json=protocolVersion,proto3" json:"protocol_version,omitempty"` 28 + AgentVersion string `protobuf:"bytes,2,opt,name=agent_version,json=agentVersion,proto3" json:"agent_version,omitempty"` 29 + BootId string `protobuf:"bytes,3,opt,name=boot_id,json=bootId,proto3" json:"boot_id,omitempty"` 30 + NixVersion string `protobuf:"bytes,4,opt,name=nix_version,json=nixVersion,proto3" json:"nix_version,omitempty"` 31 + unknownFields protoimpl.UnknownFields 32 + sizeCache protoimpl.SizeCache 33 + } 34 + 35 + func (x *Hello) Reset() { 36 + *x = Hello{} 37 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[0] 38 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 39 + ms.StoreMessageInfo(mi) 40 + } 41 + 42 + func (x *Hello) String() string { 43 + return protoimpl.X.MessageStringOf(x) 44 + } 45 + 46 + func (*Hello) ProtoMessage() {} 47 + 48 + func (x *Hello) ProtoReflect() protoreflect.Message { 49 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[0] 50 + if x != nil { 51 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 52 + if ms.LoadMessageInfo() == nil { 53 + ms.StoreMessageInfo(mi) 54 + } 55 + return ms 56 + } 57 + return mi.MessageOf(x) 58 + } 59 + 60 + // Deprecated: Use Hello.ProtoReflect.Descriptor instead. 61 + func (*Hello) Descriptor() ([]byte, []int) { 62 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{0} 63 + } 64 + 65 + func (x *Hello) GetProtocolVersion() uint32 { 66 + if x != nil { 67 + return x.ProtocolVersion 68 + } 69 + return 0 70 + } 71 + 72 + func (x *Hello) GetAgentVersion() string { 73 + if x != nil { 74 + return x.AgentVersion 75 + } 76 + return "" 77 + } 78 + 79 + func (x *Hello) GetBootId() string { 80 + if x != nil { 81 + return x.BootId 82 + } 83 + return "" 84 + } 85 + 86 + func (x *Hello) GetNixVersion() string { 87 + if x != nil { 88 + return x.NixVersion 89 + } 90 + return "" 91 + } 92 + 93 + type Init struct { 94 + state protoimpl.MessageState `protogen:"open.v1"` 95 + JobId string `protobuf:"bytes,1,opt,name=job_id,json=jobId,proto3" json:"job_id,omitempty"` 96 + CacheTrustedPublicKeys []string `protobuf:"bytes,2,rep,name=cache_trusted_public_keys,json=cacheTrustedPublicKeys,proto3" json:"cache_trusted_public_keys,omitempty"` 97 + CacheReadProxyPort uint32 `protobuf:"varint,3,opt,name=cache_read_proxy_port,json=cacheReadProxyPort,proto3" json:"cache_read_proxy_port,omitempty"` 98 + CacheUploadProxyPort uint32 `protobuf:"varint,4,opt,name=cache_upload_proxy_port,json=cacheUploadProxyPort,proto3" json:"cache_upload_proxy_port,omitempty"` 99 + DnsProxyPort uint32 `protobuf:"varint,5,opt,name=dns_proxy_port,json=dnsProxyPort,proto3" json:"dns_proxy_port,omitempty"` 100 + unknownFields protoimpl.UnknownFields 101 + sizeCache protoimpl.SizeCache 102 + } 103 + 104 + func (x *Init) Reset() { 105 + *x = Init{} 106 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[1] 107 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 108 + ms.StoreMessageInfo(mi) 109 + } 110 + 111 + func (x *Init) String() string { 112 + return protoimpl.X.MessageStringOf(x) 113 + } 114 + 115 + func (*Init) ProtoMessage() {} 116 + 117 + func (x *Init) ProtoReflect() protoreflect.Message { 118 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[1] 119 + if x != nil { 120 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 121 + if ms.LoadMessageInfo() == nil { 122 + ms.StoreMessageInfo(mi) 123 + } 124 + return ms 125 + } 126 + return mi.MessageOf(x) 127 + } 128 + 129 + // Deprecated: Use Init.ProtoReflect.Descriptor instead. 130 + func (*Init) Descriptor() ([]byte, []int) { 131 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{1} 132 + } 133 + 134 + func (x *Init) GetJobId() string { 135 + if x != nil { 136 + return x.JobId 137 + } 138 + return "" 139 + } 140 + 141 + func (x *Init) GetCacheTrustedPublicKeys() []string { 142 + if x != nil { 143 + return x.CacheTrustedPublicKeys 144 + } 145 + return nil 146 + } 147 + 148 + func (x *Init) GetCacheReadProxyPort() uint32 { 149 + if x != nil { 150 + return x.CacheReadProxyPort 151 + } 152 + return 0 153 + } 154 + 155 + func (x *Init) GetCacheUploadProxyPort() uint32 { 156 + if x != nil { 157 + return x.CacheUploadProxyPort 158 + } 159 + return 0 160 + } 161 + 162 + func (x *Init) GetDnsProxyPort() uint32 { 163 + if x != nil { 164 + return x.DnsProxyPort 165 + } 166 + return 0 167 + } 168 + 169 + type ExecStart struct { 170 + state protoimpl.MessageState `protogen:"open.v1"` 171 + Argv []string `protobuf:"bytes,1,rep,name=argv,proto3" json:"argv,omitempty"` 172 + Env []string `protobuf:"bytes,2,rep,name=env,proto3" json:"env,omitempty"` 173 + Cwd string `protobuf:"bytes,3,opt,name=cwd,proto3" json:"cwd,omitempty"` 174 + User string `protobuf:"bytes,4,opt,name=user,proto3" json:"user,omitempty"` 175 + TimeoutSeconds uint32 `protobuf:"varint,5,opt,name=timeout_seconds,json=timeoutSeconds,proto3" json:"timeout_seconds,omitempty"` 176 + unknownFields protoimpl.UnknownFields 177 + sizeCache protoimpl.SizeCache 178 + } 179 + 180 + func (x *ExecStart) Reset() { 181 + *x = ExecStart{} 182 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[2] 183 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 184 + ms.StoreMessageInfo(mi) 185 + } 186 + 187 + func (x *ExecStart) String() string { 188 + return protoimpl.X.MessageStringOf(x) 189 + } 190 + 191 + func (*ExecStart) ProtoMessage() {} 192 + 193 + func (x *ExecStart) ProtoReflect() protoreflect.Message { 194 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[2] 195 + if x != nil { 196 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 197 + if ms.LoadMessageInfo() == nil { 198 + ms.StoreMessageInfo(mi) 199 + } 200 + return ms 201 + } 202 + return mi.MessageOf(x) 203 + } 204 + 205 + // Deprecated: Use ExecStart.ProtoReflect.Descriptor instead. 206 + func (*ExecStart) Descriptor() ([]byte, []int) { 207 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{2} 208 + } 209 + 210 + func (x *ExecStart) GetArgv() []string { 211 + if x != nil { 212 + return x.Argv 213 + } 214 + return nil 215 + } 216 + 217 + func (x *ExecStart) GetEnv() []string { 218 + if x != nil { 219 + return x.Env 220 + } 221 + return nil 222 + } 223 + 224 + func (x *ExecStart) GetCwd() string { 225 + if x != nil { 226 + return x.Cwd 227 + } 228 + return "" 229 + } 230 + 231 + func (x *ExecStart) GetUser() string { 232 + if x != nil { 233 + return x.User 234 + } 235 + return "" 236 + } 237 + 238 + func (x *ExecStart) GetTimeoutSeconds() uint32 { 239 + if x != nil { 240 + return x.TimeoutSeconds 241 + } 242 + return 0 243 + } 244 + 245 + type ExecStdout struct { 246 + state protoimpl.MessageState `protogen:"open.v1"` 247 + Data string `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` 248 + unknownFields protoimpl.UnknownFields 249 + sizeCache protoimpl.SizeCache 250 + } 251 + 252 + func (x *ExecStdout) Reset() { 253 + *x = ExecStdout{} 254 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[3] 255 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 256 + ms.StoreMessageInfo(mi) 257 + } 258 + 259 + func (x *ExecStdout) String() string { 260 + return protoimpl.X.MessageStringOf(x) 261 + } 262 + 263 + func (*ExecStdout) ProtoMessage() {} 264 + 265 + func (x *ExecStdout) ProtoReflect() protoreflect.Message { 266 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[3] 267 + if x != nil { 268 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 269 + if ms.LoadMessageInfo() == nil { 270 + ms.StoreMessageInfo(mi) 271 + } 272 + return ms 273 + } 274 + return mi.MessageOf(x) 275 + } 276 + 277 + // Deprecated: Use ExecStdout.ProtoReflect.Descriptor instead. 278 + func (*ExecStdout) Descriptor() ([]byte, []int) { 279 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{3} 280 + } 281 + 282 + func (x *ExecStdout) GetData() string { 283 + if x != nil { 284 + return x.Data 285 + } 286 + return "" 287 + } 288 + 289 + type ExecStderr struct { 290 + state protoimpl.MessageState `protogen:"open.v1"` 291 + Data string `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` 292 + unknownFields protoimpl.UnknownFields 293 + sizeCache protoimpl.SizeCache 294 + } 295 + 296 + func (x *ExecStderr) Reset() { 297 + *x = ExecStderr{} 298 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[4] 299 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 300 + ms.StoreMessageInfo(mi) 301 + } 302 + 303 + func (x *ExecStderr) String() string { 304 + return protoimpl.X.MessageStringOf(x) 305 + } 306 + 307 + func (*ExecStderr) ProtoMessage() {} 308 + 309 + func (x *ExecStderr) ProtoReflect() protoreflect.Message { 310 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[4] 311 + if x != nil { 312 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 313 + if ms.LoadMessageInfo() == nil { 314 + ms.StoreMessageInfo(mi) 315 + } 316 + return ms 317 + } 318 + return mi.MessageOf(x) 319 + } 320 + 321 + // Deprecated: Use ExecStderr.ProtoReflect.Descriptor instead. 322 + func (*ExecStderr) Descriptor() ([]byte, []int) { 323 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{4} 324 + } 325 + 326 + func (x *ExecStderr) GetData() string { 327 + if x != nil { 328 + return x.Data 329 + } 330 + return "" 331 + } 332 + 333 + type ExecExit struct { 334 + state protoimpl.MessageState `protogen:"open.v1"` 335 + ExitCode int32 `protobuf:"varint,1,opt,name=exit_code,json=exitCode,proto3" json:"exit_code,omitempty"` 336 + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` 337 + // set when the guest killed the step on its own timeout timer, so the host 338 + // can classify it as a timeout rather than inferring failure from exit_code. 339 + TimedOut bool `protobuf:"varint,3,opt,name=timed_out,json=timedOut,proto3" json:"timed_out,omitempty"` 340 + unknownFields protoimpl.UnknownFields 341 + sizeCache protoimpl.SizeCache 342 + } 343 + 344 + func (x *ExecExit) Reset() { 345 + *x = ExecExit{} 346 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[5] 347 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 348 + ms.StoreMessageInfo(mi) 349 + } 350 + 351 + func (x *ExecExit) String() string { 352 + return protoimpl.X.MessageStringOf(x) 353 + } 354 + 355 + func (*ExecExit) ProtoMessage() {} 356 + 357 + func (x *ExecExit) ProtoReflect() protoreflect.Message { 358 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[5] 359 + if x != nil { 360 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 361 + if ms.LoadMessageInfo() == nil { 362 + ms.StoreMessageInfo(mi) 363 + } 364 + return ms 365 + } 366 + return mi.MessageOf(x) 367 + } 368 + 369 + // Deprecated: Use ExecExit.ProtoReflect.Descriptor instead. 370 + func (*ExecExit) Descriptor() ([]byte, []int) { 371 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{5} 372 + } 373 + 374 + func (x *ExecExit) GetExitCode() int32 { 375 + if x != nil { 376 + return x.ExitCode 377 + } 378 + return 0 379 + } 380 + 381 + func (x *ExecExit) GetError() string { 382 + if x != nil { 383 + return x.Error 384 + } 385 + return "" 386 + } 387 + 388 + func (x *ExecExit) GetTimedOut() bool { 389 + if x != nil { 390 + return x.TimedOut 391 + } 392 + return false 393 + } 394 + 395 + type ActivateConfig struct { 396 + state protoimpl.MessageState `protogen:"open.v1"` 397 + ConfigKey string `protobuf:"bytes,1,opt,name=config_key,json=configKey,proto3" json:"config_key,omitempty"` 398 + BaseConfigHash string `protobuf:"bytes,2,opt,name=base_config_hash,json=baseConfigHash,proto3" json:"base_config_hash,omitempty"` 399 + UserConfig string `protobuf:"bytes,3,opt,name=user_config,json=userConfig,proto3" json:"user_config,omitempty"` 400 + Toplevel string `protobuf:"bytes,4,opt,name=toplevel,proto3" json:"toplevel,omitempty"` 401 + TimeoutSeconds uint32 `protobuf:"varint,5,opt,name=timeout_seconds,json=timeoutSeconds,proto3" json:"timeout_seconds,omitempty"` 402 + unknownFields protoimpl.UnknownFields 403 + sizeCache protoimpl.SizeCache 404 + } 405 + 406 + func (x *ActivateConfig) Reset() { 407 + *x = ActivateConfig{} 408 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[6] 409 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 410 + ms.StoreMessageInfo(mi) 411 + } 412 + 413 + func (x *ActivateConfig) String() string { 414 + return protoimpl.X.MessageStringOf(x) 415 + } 416 + 417 + func (*ActivateConfig) ProtoMessage() {} 418 + 419 + func (x *ActivateConfig) ProtoReflect() protoreflect.Message { 420 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[6] 421 + if x != nil { 422 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 423 + if ms.LoadMessageInfo() == nil { 424 + ms.StoreMessageInfo(mi) 425 + } 426 + return ms 427 + } 428 + return mi.MessageOf(x) 429 + } 430 + 431 + // Deprecated: Use ActivateConfig.ProtoReflect.Descriptor instead. 432 + func (*ActivateConfig) Descriptor() ([]byte, []int) { 433 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{6} 434 + } 435 + 436 + func (x *ActivateConfig) GetConfigKey() string { 437 + if x != nil { 438 + return x.ConfigKey 439 + } 440 + return "" 441 + } 442 + 443 + func (x *ActivateConfig) GetBaseConfigHash() string { 444 + if x != nil { 445 + return x.BaseConfigHash 446 + } 447 + return "" 448 + } 449 + 450 + func (x *ActivateConfig) GetUserConfig() string { 451 + if x != nil { 452 + return x.UserConfig 453 + } 454 + return "" 455 + } 456 + 457 + func (x *ActivateConfig) GetToplevel() string { 458 + if x != nil { 459 + return x.Toplevel 460 + } 461 + return "" 462 + } 463 + 464 + func (x *ActivateConfig) GetTimeoutSeconds() uint32 { 465 + if x != nil { 466 + return x.TimeoutSeconds 467 + } 468 + return 0 469 + } 470 + 471 + type ActivateConfigResult struct { 472 + state protoimpl.MessageState `protogen:"open.v1"` 473 + ConfigKey string `protobuf:"bytes,1,opt,name=config_key,json=configKey,proto3" json:"config_key,omitempty"` 474 + Toplevel string `protobuf:"bytes,2,opt,name=toplevel,proto3" json:"toplevel,omitempty"` 475 + Error string `protobuf:"bytes,3,opt,name=error,proto3" json:"error,omitempty"` 476 + unknownFields protoimpl.UnknownFields 477 + sizeCache protoimpl.SizeCache 478 + } 479 + 480 + func (x *ActivateConfigResult) Reset() { 481 + *x = ActivateConfigResult{} 482 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[7] 483 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 484 + ms.StoreMessageInfo(mi) 485 + } 486 + 487 + func (x *ActivateConfigResult) String() string { 488 + return protoimpl.X.MessageStringOf(x) 489 + } 490 + 491 + func (*ActivateConfigResult) ProtoMessage() {} 492 + 493 + func (x *ActivateConfigResult) ProtoReflect() protoreflect.Message { 494 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[7] 495 + if x != nil { 496 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 497 + if ms.LoadMessageInfo() == nil { 498 + ms.StoreMessageInfo(mi) 499 + } 500 + return ms 501 + } 502 + return mi.MessageOf(x) 503 + } 504 + 505 + // Deprecated: Use ActivateConfigResult.ProtoReflect.Descriptor instead. 506 + func (*ActivateConfigResult) Descriptor() ([]byte, []int) { 507 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{7} 508 + } 509 + 510 + func (x *ActivateConfigResult) GetConfigKey() string { 511 + if x != nil { 512 + return x.ConfigKey 513 + } 514 + return "" 515 + } 516 + 517 + func (x *ActivateConfigResult) GetToplevel() string { 518 + if x != nil { 519 + return x.Toplevel 520 + } 521 + return "" 522 + } 523 + 524 + func (x *ActivateConfigResult) GetError() string { 525 + if x != nil { 526 + return x.Error 527 + } 528 + return "" 529 + } 530 + 531 + type BuiltPaths struct { 532 + state protoimpl.MessageState `protogen:"open.v1"` 533 + Paths []string `protobuf:"bytes,1,rep,name=paths,proto3" json:"paths,omitempty"` 534 + Reason string `protobuf:"bytes,2,opt,name=reason,proto3" json:"reason,omitempty"` 535 + unknownFields protoimpl.UnknownFields 536 + sizeCache protoimpl.SizeCache 537 + } 538 + 539 + func (x *BuiltPaths) Reset() { 540 + *x = BuiltPaths{} 541 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[8] 542 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 543 + ms.StoreMessageInfo(mi) 544 + } 545 + 546 + func (x *BuiltPaths) String() string { 547 + return protoimpl.X.MessageStringOf(x) 548 + } 549 + 550 + func (*BuiltPaths) ProtoMessage() {} 551 + 552 + func (x *BuiltPaths) ProtoReflect() protoreflect.Message { 553 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[8] 554 + if x != nil { 555 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 556 + if ms.LoadMessageInfo() == nil { 557 + ms.StoreMessageInfo(mi) 558 + } 559 + return ms 560 + } 561 + return mi.MessageOf(x) 562 + } 563 + 564 + // Deprecated: Use BuiltPaths.ProtoReflect.Descriptor instead. 565 + func (*BuiltPaths) Descriptor() ([]byte, []int) { 566 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{8} 567 + } 568 + 569 + func (x *BuiltPaths) GetPaths() []string { 570 + if x != nil { 571 + return x.Paths 572 + } 573 + return nil 574 + } 575 + 576 + func (x *BuiltPaths) GetReason() string { 577 + if x != nil { 578 + return x.Reason 579 + } 580 + return "" 581 + } 582 + 583 + type CacheDrain struct { 584 + state protoimpl.MessageState `protogen:"open.v1"` 585 + TimeoutSeconds uint32 `protobuf:"varint,1,opt,name=timeout_seconds,json=timeoutSeconds,proto3" json:"timeout_seconds,omitempty"` 586 + unknownFields protoimpl.UnknownFields 587 + sizeCache protoimpl.SizeCache 588 + } 589 + 590 + func (x *CacheDrain) Reset() { 591 + *x = CacheDrain{} 592 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[9] 593 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 594 + ms.StoreMessageInfo(mi) 595 + } 596 + 597 + func (x *CacheDrain) String() string { 598 + return protoimpl.X.MessageStringOf(x) 599 + } 600 + 601 + func (*CacheDrain) ProtoMessage() {} 602 + 603 + func (x *CacheDrain) ProtoReflect() protoreflect.Message { 604 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[9] 605 + if x != nil { 606 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 607 + if ms.LoadMessageInfo() == nil { 608 + ms.StoreMessageInfo(mi) 609 + } 610 + return ms 611 + } 612 + return mi.MessageOf(x) 613 + } 614 + 615 + // Deprecated: Use CacheDrain.ProtoReflect.Descriptor instead. 616 + func (*CacheDrain) Descriptor() ([]byte, []int) { 617 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{9} 618 + } 619 + 620 + func (x *CacheDrain) GetTimeoutSeconds() uint32 { 621 + if x != nil { 622 + return x.TimeoutSeconds 623 + } 624 + return 0 625 + } 626 + 627 + type CacheDrainResult struct { 628 + state protoimpl.MessageState `protogen:"open.v1"` 629 + Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` 630 + CacheQueued uint32 `protobuf:"varint,2,opt,name=cache_queued,json=cacheQueued,proto3" json:"cache_queued,omitempty"` 631 + CacheActive uint32 `protobuf:"varint,3,opt,name=cache_active,json=cacheActive,proto3" json:"cache_active,omitempty"` 632 + CacheUploaded uint32 `protobuf:"varint,4,opt,name=cache_uploaded,json=cacheUploaded,proto3" json:"cache_uploaded,omitempty"` 633 + CacheFailed uint32 `protobuf:"varint,5,opt,name=cache_failed,json=cacheFailed,proto3" json:"cache_failed,omitempty"` 634 + unknownFields protoimpl.UnknownFields 635 + sizeCache protoimpl.SizeCache 636 + } 637 + 638 + func (x *CacheDrainResult) Reset() { 639 + *x = CacheDrainResult{} 640 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[10] 641 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 642 + ms.StoreMessageInfo(mi) 643 + } 644 + 645 + func (x *CacheDrainResult) String() string { 646 + return protoimpl.X.MessageStringOf(x) 647 + } 648 + 649 + func (*CacheDrainResult) ProtoMessage() {} 650 + 651 + func (x *CacheDrainResult) ProtoReflect() protoreflect.Message { 652 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[10] 653 + if x != nil { 654 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 655 + if ms.LoadMessageInfo() == nil { 656 + ms.StoreMessageInfo(mi) 657 + } 658 + return ms 659 + } 660 + return mi.MessageOf(x) 661 + } 662 + 663 + // Deprecated: Use CacheDrainResult.ProtoReflect.Descriptor instead. 664 + func (*CacheDrainResult) Descriptor() ([]byte, []int) { 665 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{10} 666 + } 667 + 668 + func (x *CacheDrainResult) GetError() string { 669 + if x != nil { 670 + return x.Error 671 + } 672 + return "" 673 + } 674 + 675 + func (x *CacheDrainResult) GetCacheQueued() uint32 { 676 + if x != nil { 677 + return x.CacheQueued 678 + } 679 + return 0 680 + } 681 + 682 + func (x *CacheDrainResult) GetCacheActive() uint32 { 683 + if x != nil { 684 + return x.CacheActive 685 + } 686 + return 0 687 + } 688 + 689 + func (x *CacheDrainResult) GetCacheUploaded() uint32 { 690 + if x != nil { 691 + return x.CacheUploaded 692 + } 693 + return 0 694 + } 695 + 696 + func (x *CacheDrainResult) GetCacheFailed() uint32 { 697 + if x != nil { 698 + return x.CacheFailed 699 + } 700 + return 0 701 + } 702 + 703 + type Poweroff struct { 704 + state protoimpl.MessageState `protogen:"open.v1"` 705 + unknownFields protoimpl.UnknownFields 706 + sizeCache protoimpl.SizeCache 707 + } 708 + 709 + func (x *Poweroff) Reset() { 710 + *x = Poweroff{} 711 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[11] 712 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 713 + ms.StoreMessageInfo(mi) 714 + } 715 + 716 + func (x *Poweroff) String() string { 717 + return protoimpl.X.MessageStringOf(x) 718 + } 719 + 720 + func (*Poweroff) ProtoMessage() {} 721 + 722 + func (x *Poweroff) ProtoReflect() protoreflect.Message { 723 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[11] 724 + if x != nil { 725 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 726 + if ms.LoadMessageInfo() == nil { 727 + ms.StoreMessageInfo(mi) 728 + } 729 + return ms 730 + } 731 + return mi.MessageOf(x) 732 + } 733 + 734 + // Deprecated: Use Poweroff.ProtoReflect.Descriptor instead. 735 + func (*Poweroff) Descriptor() ([]byte, []int) { 736 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{11} 737 + } 738 + 739 + type PoweroffResult struct { 740 + state protoimpl.MessageState `protogen:"open.v1"` 741 + Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` 742 + unknownFields protoimpl.UnknownFields 743 + sizeCache protoimpl.SizeCache 744 + } 745 + 746 + func (x *PoweroffResult) Reset() { 747 + *x = PoweroffResult{} 748 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[12] 749 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 750 + ms.StoreMessageInfo(mi) 751 + } 752 + 753 + func (x *PoweroffResult) String() string { 754 + return protoimpl.X.MessageStringOf(x) 755 + } 756 + 757 + func (*PoweroffResult) ProtoMessage() {} 758 + 759 + func (x *PoweroffResult) ProtoReflect() protoreflect.Message { 760 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[12] 761 + if x != nil { 762 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 763 + if ms.LoadMessageInfo() == nil { 764 + ms.StoreMessageInfo(mi) 765 + } 766 + return ms 767 + } 768 + return mi.MessageOf(x) 769 + } 770 + 771 + // Deprecated: Use PoweroffResult.ProtoReflect.Descriptor instead. 772 + func (*PoweroffResult) Descriptor() ([]byte, []int) { 773 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{12} 774 + } 775 + 776 + func (x *PoweroffResult) GetError() string { 777 + if x != nil { 778 + return x.Error 779 + } 780 + return "" 781 + } 782 + 783 + type Message struct { 784 + state protoimpl.MessageState `protogen:"open.v1"` 785 + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` 786 + Hello *Hello `protobuf:"bytes,2,opt,name=hello,proto3" json:"hello,omitempty"` 787 + Init *Init `protobuf:"bytes,3,opt,name=init,proto3" json:"init,omitempty"` 788 + ExecStart *ExecStart `protobuf:"bytes,4,opt,name=exec_start,json=execStart,proto3" json:"exec_start,omitempty"` 789 + ExecStdout *ExecStdout `protobuf:"bytes,5,opt,name=exec_stdout,json=execStdout,proto3" json:"exec_stdout,omitempty"` 790 + ExecStderr *ExecStderr `protobuf:"bytes,6,opt,name=exec_stderr,json=execStderr,proto3" json:"exec_stderr,omitempty"` 791 + ExecExit *ExecExit `protobuf:"bytes,7,opt,name=exec_exit,json=execExit,proto3" json:"exec_exit,omitempty"` 792 + ActivateConfig *ActivateConfig `protobuf:"bytes,8,opt,name=activate_config,json=activateConfig,proto3" json:"activate_config,omitempty"` 793 + ActivateConfigResult *ActivateConfigResult `protobuf:"bytes,9,opt,name=activate_config_result,json=activateConfigResult,proto3" json:"activate_config_result,omitempty"` 794 + BuiltPaths *BuiltPaths `protobuf:"bytes,10,opt,name=built_paths,json=builtPaths,proto3" json:"built_paths,omitempty"` 795 + CacheDrain *CacheDrain `protobuf:"bytes,11,opt,name=cache_drain,json=cacheDrain,proto3" json:"cache_drain,omitempty"` 796 + CacheDrainResult *CacheDrainResult `protobuf:"bytes,12,opt,name=cache_drain_result,json=cacheDrainResult,proto3" json:"cache_drain_result,omitempty"` 797 + Poweroff *Poweroff `protobuf:"bytes,13,opt,name=poweroff,proto3" json:"poweroff,omitempty"` 798 + PoweroffResult *PoweroffResult `protobuf:"bytes,14,opt,name=poweroff_result,json=poweroffResult,proto3" json:"poweroff_result,omitempty"` 799 + unknownFields protoimpl.UnknownFields 800 + sizeCache protoimpl.SizeCache 801 + } 802 + 803 + func (x *Message) Reset() { 804 + *x = Message{} 805 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[13] 806 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 807 + ms.StoreMessageInfo(mi) 808 + } 809 + 810 + func (x *Message) String() string { 811 + return protoimpl.X.MessageStringOf(x) 812 + } 813 + 814 + func (*Message) ProtoMessage() {} 815 + 816 + func (x *Message) ProtoReflect() protoreflect.Message { 817 + mi := &file_spindle_agent_v1_agent_proto_msgTypes[13] 818 + if x != nil { 819 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 820 + if ms.LoadMessageInfo() == nil { 821 + ms.StoreMessageInfo(mi) 822 + } 823 + return ms 824 + } 825 + return mi.MessageOf(x) 826 + } 827 + 828 + // Deprecated: Use Message.ProtoReflect.Descriptor instead. 829 + func (*Message) Descriptor() ([]byte, []int) { 830 + return file_spindle_agent_v1_agent_proto_rawDescGZIP(), []int{13} 831 + } 832 + 833 + func (x *Message) GetId() string { 834 + if x != nil { 835 + return x.Id 836 + } 837 + return "" 838 + } 839 + 840 + func (x *Message) GetHello() *Hello { 841 + if x != nil { 842 + return x.Hello 843 + } 844 + return nil 845 + } 846 + 847 + func (x *Message) GetInit() *Init { 848 + if x != nil { 849 + return x.Init 850 + } 851 + return nil 852 + } 853 + 854 + func (x *Message) GetExecStart() *ExecStart { 855 + if x != nil { 856 + return x.ExecStart 857 + } 858 + return nil 859 + } 860 + 861 + func (x *Message) GetExecStdout() *ExecStdout { 862 + if x != nil { 863 + return x.ExecStdout 864 + } 865 + return nil 866 + } 867 + 868 + func (x *Message) GetExecStderr() *ExecStderr { 869 + if x != nil { 870 + return x.ExecStderr 871 + } 872 + return nil 873 + } 874 + 875 + func (x *Message) GetExecExit() *ExecExit { 876 + if x != nil { 877 + return x.ExecExit 878 + } 879 + return nil 880 + } 881 + 882 + func (x *Message) GetActivateConfig() *ActivateConfig { 883 + if x != nil { 884 + return x.ActivateConfig 885 + } 886 + return nil 887 + } 888 + 889 + func (x *Message) GetActivateConfigResult() *ActivateConfigResult { 890 + if x != nil { 891 + return x.ActivateConfigResult 892 + } 893 + return nil 894 + } 895 + 896 + func (x *Message) GetBuiltPaths() *BuiltPaths { 897 + if x != nil { 898 + return x.BuiltPaths 899 + } 900 + return nil 901 + } 902 + 903 + func (x *Message) GetCacheDrain() *CacheDrain { 904 + if x != nil { 905 + return x.CacheDrain 906 + } 907 + return nil 908 + } 909 + 910 + func (x *Message) GetCacheDrainResult() *CacheDrainResult { 911 + if x != nil { 912 + return x.CacheDrainResult 913 + } 914 + return nil 915 + } 916 + 917 + func (x *Message) GetPoweroff() *Poweroff { 918 + if x != nil { 919 + return x.Poweroff 920 + } 921 + return nil 922 + } 923 + 924 + func (x *Message) GetPoweroffResult() *PoweroffResult { 925 + if x != nil { 926 + return x.PoweroffResult 927 + } 928 + return nil 929 + } 930 + 931 + var File_spindle_agent_v1_agent_proto protoreflect.FileDescriptor 932 + 933 + const file_spindle_agent_v1_agent_proto_rawDesc = "" + 934 + "\n" + 935 + "\x1cspindle/agent/v1/agent.proto\x12\x10spindle.agent.v1\x1a\x1bbuf/validate/validate.proto\"\x91\x01\n" + 936 + "\x05Hello\x12)\n" + 937 + "\x10protocol_version\x18\x01 \x01(\rR\x0fprotocolVersion\x12#\n" + 938 + "\ragent_version\x18\x02 \x01(\tR\fagentVersion\x12\x17\n" + 939 + "\aboot_id\x18\x03 \x01(\tR\x06bootId\x12\x1f\n" + 940 + "\vnix_version\x18\x04 \x01(\tR\n" + 941 + "nixVersion\"\xe8\x01\n" + 942 + "\x04Init\x12\x15\n" + 943 + "\x06job_id\x18\x01 \x01(\tR\x05jobId\x129\n" + 944 + "\x19cache_trusted_public_keys\x18\x02 \x03(\tR\x16cacheTrustedPublicKeys\x121\n" + 945 + "\x15cache_read_proxy_port\x18\x03 \x01(\rR\x12cacheReadProxyPort\x125\n" + 946 + "\x17cache_upload_proxy_port\x18\x04 \x01(\rR\x14cacheUploadProxyPort\x12$\n" + 947 + "\x0edns_proxy_port\x18\x05 \x01(\rR\fdnsProxyPort\"\x80\x01\n" + 948 + "\tExecStart\x12\x12\n" + 949 + "\x04argv\x18\x01 \x03(\tR\x04argv\x12\x10\n" + 950 + "\x03env\x18\x02 \x03(\tR\x03env\x12\x10\n" + 951 + "\x03cwd\x18\x03 \x01(\tR\x03cwd\x12\x12\n" + 952 + "\x04user\x18\x04 \x01(\tR\x04user\x12'\n" + 953 + "\x0ftimeout_seconds\x18\x05 \x01(\rR\x0etimeoutSeconds\" \n" + 954 + "\n" + 955 + "ExecStdout\x12\x12\n" + 956 + "\x04data\x18\x01 \x01(\tR\x04data\" \n" + 957 + "\n" + 958 + "ExecStderr\x12\x12\n" + 959 + "\x04data\x18\x01 \x01(\tR\x04data\"Z\n" + 960 + "\bExecExit\x12\x1b\n" + 961 + "\texit_code\x18\x01 \x01(\x05R\bexitCode\x12\x14\n" + 962 + "\x05error\x18\x02 \x01(\tR\x05error\x12\x1b\n" + 963 + "\ttimed_out\x18\x03 \x01(\bR\btimedOut\"\xbf\x01\n" + 964 + "\x0eActivateConfig\x12\x1d\n" + 965 + "\n" + 966 + "config_key\x18\x01 \x01(\tR\tconfigKey\x12(\n" + 967 + "\x10base_config_hash\x18\x02 \x01(\tR\x0ebaseConfigHash\x12\x1f\n" + 968 + "\vuser_config\x18\x03 \x01(\tR\n" + 969 + "userConfig\x12\x1a\n" + 970 + "\btoplevel\x18\x04 \x01(\tR\btoplevel\x12'\n" + 971 + "\x0ftimeout_seconds\x18\x05 \x01(\rR\x0etimeoutSeconds\"g\n" + 972 + "\x14ActivateConfigResult\x12\x1d\n" + 973 + "\n" + 974 + "config_key\x18\x01 \x01(\tR\tconfigKey\x12\x1a\n" + 975 + "\btoplevel\x18\x02 \x01(\tR\btoplevel\x12\x14\n" + 976 + "\x05error\x18\x03 \x01(\tR\x05error\":\n" + 977 + "\n" + 978 + "BuiltPaths\x12\x14\n" + 979 + "\x05paths\x18\x01 \x03(\tR\x05paths\x12\x16\n" + 980 + "\x06reason\x18\x02 \x01(\tR\x06reason\"5\n" + 981 + "\n" + 982 + "CacheDrain\x12'\n" + 983 + "\x0ftimeout_seconds\x18\x01 \x01(\rR\x0etimeoutSeconds\"\xb8\x01\n" + 984 + "\x10CacheDrainResult\x12\x14\n" + 985 + "\x05error\x18\x01 \x01(\tR\x05error\x12!\n" + 986 + "\fcache_queued\x18\x02 \x01(\rR\vcacheQueued\x12!\n" + 987 + "\fcache_active\x18\x03 \x01(\rR\vcacheActive\x12%\n" + 988 + "\x0ecache_uploaded\x18\x04 \x01(\rR\rcacheUploaded\x12!\n" + 989 + "\fcache_failed\x18\x05 \x01(\rR\vcacheFailed\"\n" + 990 + "\n" + 991 + "\bPoweroff\"&\n" + 992 + "\x0ePoweroffResult\x12\x14\n" + 993 + "\x05error\x18\x01 \x01(\tR\x05error\"\xa8\b\n" + 994 + "\aMessage\x12\x17\n" + 995 + "\x02id\x18\x01 \x01(\tB\a\xbaH\x04r\x02\x10\x01R\x02id\x12-\n" + 996 + "\x05hello\x18\x02 \x01(\v2\x17.spindle.agent.v1.HelloR\x05hello\x12*\n" + 997 + "\x04init\x18\x03 \x01(\v2\x16.spindle.agent.v1.InitR\x04init\x12:\n" + 998 + "\n" + 999 + "exec_start\x18\x04 \x01(\v2\x1b.spindle.agent.v1.ExecStartR\texecStart\x12=\n" + 1000 + "\vexec_stdout\x18\x05 \x01(\v2\x1c.spindle.agent.v1.ExecStdoutR\n" + 1001 + "execStdout\x12=\n" + 1002 + "\vexec_stderr\x18\x06 \x01(\v2\x1c.spindle.agent.v1.ExecStderrR\n" + 1003 + "execStderr\x127\n" + 1004 + "\texec_exit\x18\a \x01(\v2\x1a.spindle.agent.v1.ExecExitR\bexecExit\x12I\n" + 1005 + "\x0factivate_config\x18\b \x01(\v2 .spindle.agent.v1.ActivateConfigR\x0eactivateConfig\x12\\\n" + 1006 + "\x16activate_config_result\x18\t \x01(\v2&.spindle.agent.v1.ActivateConfigResultR\x14activateConfigResult\x12=\n" + 1007 + "\vbuilt_paths\x18\n" + 1008 + " \x01(\v2\x1c.spindle.agent.v1.BuiltPathsR\n" + 1009 + "builtPaths\x12=\n" + 1010 + "\vcache_drain\x18\v \x01(\v2\x1c.spindle.agent.v1.CacheDrainR\n" + 1011 + "cacheDrain\x12P\n" + 1012 + "\x12cache_drain_result\x18\f \x01(\v2\".spindle.agent.v1.CacheDrainResultR\x10cacheDrainResult\x126\n" + 1013 + "\bpoweroff\x18\r \x01(\v2\x1a.spindle.agent.v1.PoweroffR\bpoweroff\x12I\n" + 1014 + "\x0fpoweroff_result\x18\x0e \x01(\v2 .spindle.agent.v1.PoweroffResultR\x0epoweroffResult:\xb9\x01\xbaH\xb5\x01\"\xb2\x01\n" + 1015 + "\x05hello\n" + 1016 + "\x04init\n" + 1017 + "\n" + 1018 + "exec_start\n" + 1019 + "\vexec_stdout\n" + 1020 + "\vexec_stderr\n" + 1021 + "\texec_exit\n" + 1022 + "\x0factivate_config\n" + 1023 + "\x16activate_config_result\n" + 1024 + "\vbuilt_paths\n" + 1025 + "\vcache_drain\n" + 1026 + "\x12cache_drain_result\n" + 1027 + "\bpoweroff\n" + 1028 + "\x0fpoweroff_result\x10\x01B1Z/tangled.org/core/spindle/agentproto/gen;agentv1b\x06proto3" 1029 + 1030 + var ( 1031 + file_spindle_agent_v1_agent_proto_rawDescOnce sync.Once 1032 + file_spindle_agent_v1_agent_proto_rawDescData []byte 1033 + ) 1034 + 1035 + func file_spindle_agent_v1_agent_proto_rawDescGZIP() []byte { 1036 + file_spindle_agent_v1_agent_proto_rawDescOnce.Do(func() { 1037 + file_spindle_agent_v1_agent_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_spindle_agent_v1_agent_proto_rawDesc), len(file_spindle_agent_v1_agent_proto_rawDesc))) 1038 + }) 1039 + return file_spindle_agent_v1_agent_proto_rawDescData 1040 + } 1041 + 1042 + var file_spindle_agent_v1_agent_proto_msgTypes = make([]protoimpl.MessageInfo, 14) 1043 + var file_spindle_agent_v1_agent_proto_goTypes = []any{ 1044 + (*Hello)(nil), // 0: spindle.agent.v1.Hello 1045 + (*Init)(nil), // 1: spindle.agent.v1.Init 1046 + (*ExecStart)(nil), // 2: spindle.agent.v1.ExecStart 1047 + (*ExecStdout)(nil), // 3: spindle.agent.v1.ExecStdout 1048 + (*ExecStderr)(nil), // 4: spindle.agent.v1.ExecStderr 1049 + (*ExecExit)(nil), // 5: spindle.agent.v1.ExecExit 1050 + (*ActivateConfig)(nil), // 6: spindle.agent.v1.ActivateConfig 1051 + (*ActivateConfigResult)(nil), // 7: spindle.agent.v1.ActivateConfigResult 1052 + (*BuiltPaths)(nil), // 8: spindle.agent.v1.BuiltPaths 1053 + (*CacheDrain)(nil), // 9: spindle.agent.v1.CacheDrain 1054 + (*CacheDrainResult)(nil), // 10: spindle.agent.v1.CacheDrainResult 1055 + (*Poweroff)(nil), // 11: spindle.agent.v1.Poweroff 1056 + (*PoweroffResult)(nil), // 12: spindle.agent.v1.PoweroffResult 1057 + (*Message)(nil), // 13: spindle.agent.v1.Message 1058 + } 1059 + var file_spindle_agent_v1_agent_proto_depIdxs = []int32{ 1060 + 0, // 0: spindle.agent.v1.Message.hello:type_name -> spindle.agent.v1.Hello 1061 + 1, // 1: spindle.agent.v1.Message.init:type_name -> spindle.agent.v1.Init 1062 + 2, // 2: spindle.agent.v1.Message.exec_start:type_name -> spindle.agent.v1.ExecStart 1063 + 3, // 3: spindle.agent.v1.Message.exec_stdout:type_name -> spindle.agent.v1.ExecStdout 1064 + 4, // 4: spindle.agent.v1.Message.exec_stderr:type_name -> spindle.agent.v1.ExecStderr 1065 + 5, // 5: spindle.agent.v1.Message.exec_exit:type_name -> spindle.agent.v1.ExecExit 1066 + 6, // 6: spindle.agent.v1.Message.activate_config:type_name -> spindle.agent.v1.ActivateConfig 1067 + 7, // 7: spindle.agent.v1.Message.activate_config_result:type_name -> spindle.agent.v1.ActivateConfigResult 1068 + 8, // 8: spindle.agent.v1.Message.built_paths:type_name -> spindle.agent.v1.BuiltPaths 1069 + 9, // 9: spindle.agent.v1.Message.cache_drain:type_name -> spindle.agent.v1.CacheDrain 1070 + 10, // 10: spindle.agent.v1.Message.cache_drain_result:type_name -> spindle.agent.v1.CacheDrainResult 1071 + 11, // 11: spindle.agent.v1.Message.poweroff:type_name -> spindle.agent.v1.Poweroff 1072 + 12, // 12: spindle.agent.v1.Message.poweroff_result:type_name -> spindle.agent.v1.PoweroffResult 1073 + 13, // [13:13] is the sub-list for method output_type 1074 + 13, // [13:13] is the sub-list for method input_type 1075 + 13, // [13:13] is the sub-list for extension type_name 1076 + 13, // [13:13] is the sub-list for extension extendee 1077 + 0, // [0:13] is the sub-list for field type_name 1078 + } 1079 + 1080 + func init() { file_spindle_agent_v1_agent_proto_init() } 1081 + func file_spindle_agent_v1_agent_proto_init() { 1082 + if File_spindle_agent_v1_agent_proto != nil { 1083 + return 1084 + } 1085 + type x struct{} 1086 + out := protoimpl.TypeBuilder{ 1087 + File: protoimpl.DescBuilder{ 1088 + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), 1089 + RawDescriptor: unsafe.Slice(unsafe.StringData(file_spindle_agent_v1_agent_proto_rawDesc), len(file_spindle_agent_v1_agent_proto_rawDesc)), 1090 + NumEnums: 0, 1091 + NumMessages: 14, 1092 + NumExtensions: 0, 1093 + NumServices: 0, 1094 + }, 1095 + GoTypes: file_spindle_agent_v1_agent_proto_goTypes, 1096 + DependencyIndexes: file_spindle_agent_v1_agent_proto_depIdxs, 1097 + MessageInfos: file_spindle_agent_v1_agent_proto_msgTypes, 1098 + }.Build() 1099 + File_spindle_agent_v1_agent_proto = out.File 1100 + file_spindle_agent_v1_agent_proto_goTypes = nil 1101 + file_spindle_agent_v1_agent_proto_depIdxs = nil 1102 + }
+98
spindle/agentproto/protocol.go
··· 1 + package agentproto 2 + 3 + import ( 4 + "encoding/binary" 5 + "fmt" 6 + "io" 7 + "sync" 8 + 9 + "google.golang.org/protobuf/proto" 10 + 11 + "buf.build/go/protovalidate" 12 + agentv1 "tangled.org/core/spindle/agentproto/gen" 13 + ) 14 + 15 + const ( 16 + ProtocolVersion = 1 17 + DefaultPort = 10240 18 + MaxMessageBytes = 1024 * 1024 19 + ) 20 + 21 + type Message = agentv1.Message 22 + 23 + var validator protovalidate.Validator 24 + 25 + func init() { 26 + var err error 27 + validator, err = protovalidate.New() 28 + if err != nil { 29 + panic(fmt.Errorf("failed to initialize protovalidate validator: %w", err)) 30 + } 31 + } 32 + 33 + type Encoder struct { 34 + mu sync.Mutex 35 + w io.Writer 36 + } 37 + 38 + func NewEncoder(w io.Writer) *Encoder { 39 + return &Encoder{w: w} 40 + } 41 + 42 + func (e *Encoder) Encode(msg *Message) error { 43 + if err := validator.Validate(msg); err != nil { 44 + return fmt.Errorf("validate agent message: %w", err) 45 + } 46 + 47 + data, err := proto.Marshal(msg) 48 + if err != nil { 49 + return fmt.Errorf("marshal agent message: %w", err) 50 + } 51 + if len(data) > MaxMessageBytes { 52 + return fmt.Errorf("agent message exceeded %d bytes", MaxMessageBytes) 53 + } 54 + 55 + var header [4]byte 56 + binary.BigEndian.PutUint32(header[:], uint32(len(data))) 57 + 58 + e.mu.Lock() 59 + defer e.mu.Unlock() 60 + if _, err := e.w.Write(header[:]); err != nil { 61 + return err 62 + } 63 + _, err = e.w.Write(data) 64 + return err 65 + } 66 + 67 + type Decoder struct { 68 + r io.Reader 69 + } 70 + 71 + func NewDecoder(r io.Reader) *Decoder { 72 + return &Decoder{r: r} 73 + } 74 + 75 + func (d *Decoder) Decode() (*Message, error) { 76 + msg := &Message{} 77 + var header [4]byte 78 + if _, err := io.ReadFull(d.r, header[:]); err != nil { 79 + return msg, err 80 + } 81 + 82 + size := binary.BigEndian.Uint32(header[:]) 83 + if size > MaxMessageBytes { 84 + return msg, fmt.Errorf("agent message exceeded %d bytes", MaxMessageBytes) 85 + } 86 + 87 + data := make([]byte, size) 88 + if _, err := io.ReadFull(d.r, data); err != nil { 89 + return msg, err 90 + } 91 + if err := proto.Unmarshal(data, msg); err != nil { 92 + return msg, fmt.Errorf("parse agent message: %w", err) 93 + } 94 + if err := validator.Validate(msg); err != nil { 95 + return msg, fmt.Errorf("validate agent message: %w", err) 96 + } 97 + return msg, nil 98 + }
+57
spindle/agentproto/protocol_test.go
··· 1 + package agentproto 2 + 3 + import ( 4 + "bytes" 5 + "encoding/binary" 6 + "testing" 7 + 8 + agentv1 "tangled.org/core/spindle/agentproto/gen" 9 + ) 10 + 11 + func TestDecoderRejectsOversizedMessage(t *testing.T) { 12 + var tooLarge bytes.Buffer 13 + var header [4]byte 14 + binary.BigEndian.PutUint32(header[:], MaxMessageBytes+1) 15 + tooLarge.Write(header[:]) 16 + 17 + _, err := NewDecoder(&tooLarge).Decode() 18 + if err == nil { 19 + t.Fatal("expected oversized message error") 20 + } 21 + } 22 + 23 + func TestValidation(t *testing.T) { 24 + // 1. Valid message (exactly one of the payload fields is set) 25 + validMsg := &Message{ 26 + Id: "test-1", 27 + Hello: &agentv1.Hello{ 28 + ProtocolVersion: 1, 29 + AgentVersion: "1.0", 30 + }, 31 + } 32 + if err := validator.Validate(validMsg); err != nil { 33 + t.Fatalf("expected valid message to pass validation, got: %v", err) 34 + } 35 + 36 + // 2. Invalid message: zero payloads set 37 + invalidZeroMsg := &Message{ 38 + Id: "test-2", 39 + } 40 + if err := validator.Validate(invalidZeroMsg); err == nil { 41 + t.Fatal("expected message with zero payloads to fail validation") 42 + } 43 + 44 + // 3. Invalid message: multiple payloads set 45 + invalidMultiMsg := &Message{ 46 + Id: "test-3", 47 + Hello: &agentv1.Hello{ 48 + ProtocolVersion: 1, 49 + }, 50 + Init: &agentv1.Init{ 51 + JobId: "job-1", 52 + }, 53 + } 54 + if err := validator.Validate(invalidMultiMsg); err == nil { 55 + t.Fatal("expected message with multiple payloads to fail validation") 56 + } 57 + }
+110
spindle/agentproto/spindle/agent/v1/agent.proto
··· 1 + syntax = "proto3"; 2 + 3 + package spindle.agent.v1; 4 + 5 + import "buf/validate/validate.proto"; 6 + 7 + option go_package = "tangled.org/core/spindle/agentproto/gen;agentv1"; 8 + 9 + message Hello { 10 + uint32 protocol_version = 1; 11 + string agent_version = 2; 12 + string boot_id = 3; 13 + string nix_version = 4; 14 + } 15 + 16 + message Init { 17 + string job_id = 1; 18 + repeated string cache_trusted_public_keys = 2; 19 + uint32 cache_read_proxy_port = 3; 20 + uint32 cache_upload_proxy_port = 4; 21 + uint32 dns_proxy_port = 5; 22 + } 23 + 24 + message ExecStart { 25 + repeated string argv = 1; 26 + repeated string env = 2; 27 + string cwd = 3; 28 + string user = 4; 29 + uint32 timeout_seconds = 5; 30 + } 31 + 32 + message ExecStdout { 33 + string data = 1; 34 + } 35 + 36 + message ExecStderr { 37 + string data = 1; 38 + } 39 + 40 + message ExecExit { 41 + int32 exit_code = 1; 42 + string error = 2; 43 + // set when the guest killed the step on its own timeout timer, so the host 44 + // can classify it as a timeout rather than inferring failure from exit_code. 45 + bool timed_out = 3; 46 + } 47 + 48 + message ActivateConfig { 49 + string config_key = 1; 50 + string base_config_hash = 2; 51 + string user_config = 3; 52 + string toplevel = 4; 53 + uint32 timeout_seconds = 5; 54 + } 55 + 56 + message ActivateConfigResult { 57 + string config_key = 1; 58 + string toplevel = 2; 59 + string error = 3; 60 + } 61 + 62 + message BuiltPaths { 63 + repeated string paths = 1; 64 + string reason = 2; 65 + } 66 + 67 + message CacheDrain { 68 + uint32 timeout_seconds = 1; 69 + } 70 + 71 + message CacheDrainResult { 72 + string error = 1; 73 + uint32 cache_queued = 2; 74 + uint32 cache_active = 3; 75 + uint32 cache_uploaded = 4; 76 + uint32 cache_failed = 5; 77 + } 78 + 79 + message Poweroff {} 80 + 81 + message PoweroffResult { 82 + string error = 1; 83 + } 84 + 85 + message Message { 86 + option (buf.validate.message).oneof = { 87 + fields: [ 88 + "hello", "init", "exec_start", "exec_stdout", "exec_stderr", "exec_exit", 89 + "activate_config", "activate_config_result", "built_paths", "cache_drain", 90 + "cache_drain_result", "poweroff", "poweroff_result" 91 + ], 92 + required: true 93 + }; 94 + 95 + string id = 1 [(buf.validate.field).string.min_len = 1]; 96 + 97 + Hello hello = 2; 98 + Init init = 3; 99 + ExecStart exec_start = 4; 100 + ExecStdout exec_stdout = 5; 101 + ExecStderr exec_stderr = 6; 102 + ExecExit exec_exit = 7; 103 + ActivateConfig activate_config = 8; 104 + ActivateConfigResult activate_config_result = 9; 105 + BuiltPaths built_paths = 10; 106 + CacheDrain cache_drain = 11; 107 + CacheDrainResult cache_drain_result = 12; 108 + Poweroff poweroff = 13; 109 + PoweroffResult poweroff_result = 14; 110 + }
+56 -20
spindle/config/config.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "time" 5 6 6 7 "github.com/bluesky-social/indigo/atproto/syntax" 7 8 "github.com/sethvargo/go-envconfig" ··· 9 10 ) 10 11 11 12 type Server struct { 12 - ListenAddr string `env:"LISTEN_ADDR, default=0.0.0.0:6555"` 13 - DBPath string `env:"DB_PATH, default=spindle.db"` 14 - Hostname string `env:"HOSTNAME, required"` 15 - JetstreamEndpoint string `env:"JETSTREAM_ENDPOINT, default=wss://jetstream1.us-west.bsky.network/subscribe"` 16 - Tap Tap `env:",prefix=TAP_"` 17 - PlcUrl string `env:"PLC_URL, default=https://plc.directory"` 18 - Dev bool `env:"DEV, default=false"` 19 - Owner string `env:"OWNER, required"` 20 - Secrets Secrets `env:",prefix=SECRETS_"` 21 - LogDir string `env:"LOG_DIR, default=/var/log/spindle"` 22 - QueueSize int `env:"QUEUE_SIZE, default=100"` 23 - MaxJobCount int `env:"MAX_JOB_COUNT, default=2"` // max number of pipelines that run at a time 24 - MaxConcurrentWorkflows int `env:"MAX_CONCURRENT_WORKFLOWS, default=8"` // max number of workflow containers running at once (memory cap) 25 - DockerSocket string `env:"DOCKER_SOCKET"` // path to a docker socket to expose to workflow containers 13 + ListenAddr string `env:"LISTEN_ADDR, default=0.0.0.0:6555"` 14 + DBPath string `env:"DB_PATH, default=spindle.db"` 15 + Hostname string `env:"HOSTNAME, required"` 16 + JetstreamEndpoint string `env:"JETSTREAM_ENDPOINT, default=wss://jetstream1.us-west.bsky.network/subscribe"` 17 + Tap Tap `env:",prefix=TAP_"` 18 + PlcUrl string `env:"PLC_URL, default=https://plc.directory"` 19 + Dev bool `env:"DEV, default=false"` 20 + DevExtraHosts []string `env:"DEV_EXTRA_HOSTS"` 21 + Owner string `env:"OWNER, required"` 22 + Secrets Secrets `env:",prefix=SECRETS_"` 23 + LogDir string `env:"LOG_DIR, default=/var/log/spindle"` 24 + QueueSize int `env:"QUEUE_SIZE, default=100"` 25 + MaxJobCount int `env:"MAX_JOB_COUNT, default=2"` // max number of pipelines that run at a time 26 + DockerSocket string `env:"DOCKER_SOCKET"` // path to a docker socket to expose to workflow containers 26 27 } 27 28 28 29 type Tap struct { ··· 49 50 } 50 51 51 52 type NixeryPipelines struct { 52 - Nixery string `env:"NIXERY, default=nixery.tangled.sh"` 53 - WorkflowTimeout string `env:"WORKFLOW_TIMEOUT, default=5m"` 54 - MaxJobMemoryMB int64 `env:"MAX_JOB_MEMORY_MB, default=6144"` // per-container memory limit in MiB (default 6 GiB) 53 + Nixery string `env:"NIXERY, default=nixery.tangled.sh"` 54 + WorkflowTimeout string `env:"WORKFLOW_TIMEOUT, default=5m"` 55 + MaxJobMemoryMB int64 `env:"MAX_JOB_MEMORY_MB, default=6144"` // per-container memory limit in MiB (default 6 GiB) 56 + MaxConcurrentWorkflows int `env:"MAX_CONCURRENT_WORKFLOWS, default=8"` // max number of workflow containers running at once (memory cap) 55 57 } 56 58 57 59 type S3 struct { 58 60 LogBucket string `env:"LOG_BUCKET"` 59 61 } 60 62 63 + type MicroVMPipelines struct { 64 + ImageDir string `env:"IMAGE_DIR, required"` 65 + OverlayDir string `env:"OVERLAY_DIR, default="` // where microVM temporary disks will live 66 + DefaultImage string `env:"DEFAULT_IMAGE, default=nixos-x86_64"` 67 + AgentPort uint32 `env:"AGENT_PORT, default=10240"` 68 + EnableKVM bool `env:"ENABLE_KVM, default=true"` 69 + WorkflowTimeout string `env:"WORKFLOW_TIMEOUT, default=5m"` 70 + 71 + MaxTotalMemoryMiB int64 `env:"MAX_TOTAL_MEMORY_MIB, default=0"` 72 + MaxTotalVCPUs int64 `env:"MAX_TOTAL_VCPUS, default=0"` 73 + MaxTotalDiskMiB int64 `env:"MAX_TOTAL_DISK_MIB, default=0"` 74 + 75 + MaxWorkflowMemoryMiB int64 `env:"MAX_WORKFLOW_MEMORY_MIB, default=0"` 76 + MaxWorkflowVCPUs int64 `env:"MAX_WORKFLOW_VCPUS, default=0"` 77 + MaxWorkflowDiskMiB int64 `env:"MAX_WORKFLOW_DISK_MIB, default=0"` 78 + 79 + AgingThreshold time.Duration `env:"AGING_THRESHOLD, default=30s"` 80 + 81 + EnableCgroups bool `env:"ENABLE_CGROUPS, default=false"` 82 + CgroupParent string `env:"CGROUP_PARENT, default=self"` 83 + CgroupPidsMax int64 `env:"CGROUP_PIDS_MAX, default=4096"` 84 + CgroupSwapMaxMiB *int64 `env:"CGROUP_SWAP_MAX_MIB"` 85 + // memory.min that will get assigned to the supervisor (spindle itself) cgroup 86 + CgroupSupervisorMemoryMinMiB int64 `env:"CGROUP_SUPERVISOR_MEMORY_MIN_MIB, default=512"` 87 + } 88 + 89 + type NixCache struct { 90 + ReadURLs []string `env:"READ_URLS"` 91 + TrustedPublicKeys []string `env:"TRUSTED_PUBLIC_KEYS"` 92 + UploadURL string `env:"UPLOAD_URL"` 93 + } 94 + 61 95 type Config struct { 62 - Server Server `env:",prefix=SPINDLE_SERVER_"` 63 - NixeryPipelines NixeryPipelines `env:",prefix=SPINDLE_NIXERY_PIPELINES_"` 64 - S3 S3 `env:",prefix=SPINDLE_S3_"` 96 + Server Server `env:",prefix=SPINDLE_SERVER_"` 97 + NixeryPipelines NixeryPipelines `env:",prefix=SPINDLE_NIXERY_PIPELINES_"` 98 + MicroVMPipelines MicroVMPipelines `env:",prefix=SPINDLE_MICROVM_PIPELINES_"` 99 + NixCache NixCache `env:",prefix=SPINDLE_NIX_CACHE_"` 100 + S3 S3 `env:",prefix=SPINDLE_S3_"` 65 101 } 66 102 67 103 func Load(ctx context.Context) (*Config, error) {
+6
spindle/db/db.go
··· 101 101 created integer not null -- unix nanos 102 102 ); 103 103 104 + create table if not exists nixos_toplevel_cache ( 105 + config_key text primary key, 106 + toplevel text not null, 107 + updated_at text not null 108 + ); 109 + 104 110 create table if not exists migrations ( 105 111 id integer primary key autoincrement, 106 112 name text unique
+41
spindle/db/nixos_toplevel_cache.go
··· 1 + package db 2 + 3 + import ( 4 + "time" 5 + ) 6 + 7 + type NixOSToplevelCacheRecord struct { 8 + ConfigKey string 9 + Toplevel string 10 + UpdatedAt time.Time 11 + } 12 + 13 + func (d *DB) GetNixOSToplevelCacheRecord(configKey string) (*NixOSToplevelCacheRecord, error) { 14 + var record NixOSToplevelCacheRecord 15 + var updatedAtStr string 16 + err := d.QueryRow( 17 + `select config_key, toplevel, updated_at from nixos_toplevel_cache where config_key = ?`, 18 + configKey, 19 + ).Scan(&record.ConfigKey, &record.Toplevel, &updatedAtStr) 20 + if err != nil { 21 + return nil, err 22 + } 23 + updatedAt, err := time.Parse(time.RFC3339, updatedAtStr) 24 + if err != nil { 25 + return nil, err 26 + } 27 + record.UpdatedAt = updatedAt 28 + return &record, nil 29 + } 30 + 31 + func (d *DB) SaveNixOSToplevelCacheRecord(configKey, toplevel string) error { 32 + _, err := d.Exec( 33 + `insert into nixos_toplevel_cache (config_key, toplevel, updated_at) 34 + values (?, ?, ?) 35 + on conflict(config_key) do update set 36 + toplevel = excluded.toplevel, 37 + updated_at = excluded.updated_at`, 38 + configKey, toplevel, time.Now().UTC().Format(time.RFC3339), 39 + ) 40 + return err 41 + }
+31 -5
spindle/engine/engine.go
··· 20 20 ErrWorkflowFailed = errors.New("workflow failed") 21 21 ) 22 22 23 - func StartWorkflows(l *slog.Logger, vault secrets.Manager, cfg *config.Config, db *db.DB, n *notifier.Notifier, workflowSem chan struct{}, ctx context.Context, pipeline *models.Pipeline, pipelineId models.PipelineId) { 23 + type workflowFinalizer interface { 24 + FinalizeWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow, wfLogger models.WorkflowLogger) error 25 + } 26 + 27 + func StartWorkflows(l *slog.Logger, vault secrets.Manager, cfg *config.Config, db *db.DB, n *notifier.Notifier, ctx context.Context, pipeline *models.Pipeline, pipelineId models.PipelineId) { 24 28 l.Info("starting all workflows in parallel", "pipeline", pipelineId) 25 29 26 30 // extract secrets ··· 74 78 defer wfLogger.Close() 75 79 } 76 80 81 + l.Info("waiting for slot", "wid", wid) 82 + slot := WorkflowSlot(NoopSlot{}) 83 + if s, ok := eng.(WorkflowSlotter); ok { 84 + var err error 85 + slot, err = s.AcquireWorkflowSlot(ctx, wid, &w) 86 + if err != nil { 87 + l.Error("failed to acquire slot", "wid", wid, "err", err) 88 + dbErr := db.StatusFailed(wid, err.Error(), -1, n) 89 + if dbErr != nil { 90 + l.Error("failed to set workflow status to failed", "wid", wid, "err", dbErr) 91 + } 92 + return 93 + } 94 + } 95 + defer slot.Release() 96 + 77 97 err = db.StatusRunning(wid, n) 78 98 if err != nil { 79 99 l.Error("failed to set workflow status to running", "wid", wid, "err", err) 80 100 return 81 101 } 82 - 83 - // acquire semaphore slot before starting the container 84 - workflowSem <- struct{}{} 85 - defer func() { <-workflowSem }() 86 102 87 103 err = eng.SetupWorkflow(ctx, wid, &w, wfLogger) 88 104 if err != nil { ··· 134 150 if dbErr != nil { 135 151 l.Error("failed to set workflow status to failed", "wid", wid, "err", dbErr) 136 152 } 153 + } 154 + return 155 + } 156 + } 157 + 158 + if finalizer, ok := eng.(workflowFinalizer); ok { 159 + if err := finalizer.FinalizeWorkflow(ctx, wid, &w, wfLogger); err != nil { 160 + dbErr := db.StatusFailed(wid, err.Error(), -1, n) 161 + if dbErr != nil { 162 + l.Error("failed to set workflow status to failed", "wid", wid, "err", dbErr) 137 163 } 138 164 return 139 165 }
+139
spindle/engine/scheduler.go
··· 1 + package engine 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "slices" 7 + "sync" 8 + "time" 9 + ) 10 + 11 + const defaultAgingThreshold = 30 * time.Second 12 + 13 + type Resources[Self any] interface { 14 + Fits(Self) bool 15 + Add(Self) Self 16 + Sub(Self) Self 17 + } 18 + 19 + type ResourceScheduler[R Resources[R]] struct { 20 + mu sync.Mutex 21 + budget R 22 + max R 23 + used R 24 + queue []*resourceWaiter[R] 25 + now func() time.Time // get time now, is a field for mocking 26 + agingThreshold time.Duration 27 + } 28 + 29 + type resourceWaiter[R Resources[R]] struct { 30 + req R 31 + ready chan struct{} 32 + enqueuedAt time.Time 33 + } 34 + 35 + type resourceLease[R Resources[R]] struct { 36 + scheduler *ResourceScheduler[R] 37 + req R 38 + once sync.Once 39 + } 40 + 41 + func NewResourceScheduler[R Resources[R]](budget, max R, agingThreshold time.Duration) *ResourceScheduler[R] { 42 + if agingThreshold <= 0 { 43 + agingThreshold = defaultAgingThreshold 44 + } 45 + return &ResourceScheduler[R]{ 46 + budget: budget, 47 + max: max, 48 + now: time.Now, 49 + agingThreshold: agingThreshold, 50 + } 51 + } 52 + 53 + func (s *ResourceScheduler[R]) Acquire(ctx context.Context, req R) (WorkflowSlot, error) { 54 + if s == nil { 55 + return NoopSlot{}, nil 56 + } 57 + 58 + s.mu.Lock() 59 + if !req.Fits(s.budget) || !req.Fits(s.max) { 60 + s.mu.Unlock() 61 + return nil, fmt.Errorf("%w: request=%v budget=%v max=%v", ErrNoWorkflowSlots, req, s.budget, s.max) 62 + } 63 + if len(s.queue) == 0 && s.used.Add(req).Fits(s.budget) { 64 + s.used = s.used.Add(req) 65 + s.mu.Unlock() 66 + return &resourceLease[R]{scheduler: s, req: req}, nil 67 + } 68 + 69 + waiter := &resourceWaiter[R]{req: req, ready: make(chan struct{}), enqueuedAt: s.now()} 70 + s.queue = append(s.queue, waiter) 71 + s.schedule() 72 + s.mu.Unlock() 73 + 74 + select { 75 + case <-waiter.ready: 76 + return &resourceLease[R]{scheduler: s, req: req}, nil 77 + case <-ctx.Done(): 78 + s.mu.Lock() 79 + select { 80 + case <-waiter.ready: 81 + // undo committed resources, schedule already did that 82 + s.used = s.used.Sub(req) 83 + default: 84 + // still in queue, just remove 85 + s.remove(waiter) 86 + } 87 + s.schedule() 88 + s.mu.Unlock() 89 + return nil, ctx.Err() 90 + } 91 + } 92 + 93 + func (l *resourceLease[R]) Release() { 94 + if l == nil || l.scheduler == nil { 95 + return 96 + } 97 + l.once.Do(func() { 98 + l.scheduler.release(l.req) 99 + }) 100 + } 101 + 102 + func (s *ResourceScheduler[R]) release(req R) { 103 + s.mu.Lock() 104 + defer s.mu.Unlock() 105 + s.used = s.used.Sub(req) 106 + s.schedule() 107 + } 108 + 109 + // start every waiter whose request fits. once a waiter is older than 110 + // agingThreshold, count its request as already used so younger waiters 111 + // stop being scheduled ahead of it. 112 + func (s *ResourceScheduler[R]) schedule() { 113 + var reserved R 114 + now := s.now() 115 + i := 0 116 + for i < len(s.queue) { 117 + w := s.queue[i] 118 + if s.used.Add(reserved).Add(w.req).Fits(s.budget) { 119 + s.queue = slices.Delete(s.queue, i, i+1) 120 + s.used = s.used.Add(w.req) 121 + close(w.ready) 122 + continue 123 + } 124 + if now.Sub(w.enqueuedAt) >= s.agingThreshold { 125 + reserved = reserved.Add(w.req) 126 + } 127 + i++ 128 + } 129 + } 130 + 131 + func (s *ResourceScheduler[R]) remove(waiter *resourceWaiter[R]) { 132 + for i, candidate := range s.queue { 133 + if candidate != waiter { 134 + continue 135 + } 136 + s.queue = slices.Delete(s.queue, i, i+1) 137 + return 138 + } 139 + }
+200
spindle/engine/scheduler_test.go
··· 1 + package engine 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "fmt" 7 + "testing" 8 + "time" 9 + ) 10 + 11 + // resources for testing 12 + type ru struct{ a, b int64 } 13 + 14 + func (r ru) Fits(limit ru) bool { 15 + if limit.a > 0 && r.a > limit.a { 16 + return false 17 + } 18 + if limit.b > 0 && r.b > limit.b { 19 + return false 20 + } 21 + return true 22 + } 23 + func (r ru) Add(o ru) ru { return ru{r.a + o.a, r.b + o.b} } 24 + func (r ru) Sub(o ru) ru { return ru{max(0, r.a-o.a), max(0, r.b-o.b)} } 25 + func (r ru) String() string { 26 + return fmt.Sprintf("a=%d b=%d", r.a, r.b) 27 + } 28 + 29 + type acquireResult struct { 30 + slot WorkflowSlot 31 + err error 32 + } 33 + 34 + func TestResourceSchedulerZeroLimitsDoNotApply(t *testing.T) { 35 + t.Parallel() 36 + 37 + scheduler := NewResourceScheduler(ru{}, ru{}, 0) 38 + 39 + slot, err := scheduler.Acquire(context.Background(), ru{a: 1 << 20, b: 1 << 20}) 40 + if err != nil { 41 + t.Fatalf("Acquire() error = %v", err) 42 + } 43 + slot.Release() 44 + } 45 + 46 + func TestResourceSchedulerRejectsRequestsThatCanNeverFit(t *testing.T) { 47 + t.Parallel() 48 + 49 + scheduler := NewResourceScheduler(ru{a: 1024, b: 10_000}, ru{a: 512, b: 5_000}, 0) 50 + 51 + _, err := scheduler.Acquire(context.Background(), ru{a: 768, b: 100}) 52 + if !errors.Is(err, ErrNoWorkflowSlots) { 53 + t.Fatalf("Acquire() error = %v, want ErrNoWorkflowSlots", err) 54 + } 55 + 56 + _, err = scheduler.Acquire(context.Background(), ru{a: 128, b: 12_000}) 57 + if !errors.Is(err, ErrNoWorkflowSlots) { 58 + t.Fatalf("Acquire() error = %v, want ErrNoWorkflowSlots", err) 59 + } 60 + } 61 + 62 + func TestResourceSchedulerWaitsUntilResourcesAreReleased(t *testing.T) { 63 + t.Parallel() 64 + 65 + scheduler := NewResourceScheduler(ru{a: 1024}, ru{}, 0) 66 + 67 + first, err := scheduler.Acquire(context.Background(), ru{a: 1024}) 68 + if err != nil { 69 + t.Fatalf("first Acquire() error = %v", err) 70 + } 71 + defer first.Release() 72 + 73 + ch := acquireAsync(context.Background(), scheduler, ru{a: 1}) 74 + assertAcquireBlocked(t, ch) 75 + 76 + first.Release() 77 + first = NoopSlot{} 78 + 79 + second := waitAcquireOK(t, ch) 80 + second.Release() 81 + } 82 + 83 + func TestResourceSchedulerReleaseIsIdempotent(t *testing.T) { 84 + t.Parallel() 85 + 86 + scheduler := NewResourceScheduler(ru{a: 1}, ru{}, 0) 87 + 88 + slot, err := scheduler.Acquire(context.Background(), ru{a: 1}) 89 + if err != nil { 90 + t.Fatalf("Acquire() error = %v", err) 91 + } 92 + 93 + slot.Release() 94 + slot.Release() 95 + 96 + second, err := scheduler.Acquire(context.Background(), ru{a: 1}) 97 + if err != nil { 98 + t.Fatalf("Acquire() after double release error = %v", err) 99 + } 100 + second.Release() 101 + } 102 + 103 + func TestResourceSchedulerBackfillsPastBlockedHead(t *testing.T) { 104 + t.Parallel() 105 + 106 + scheduler := NewResourceScheduler(ru{a: 1024}, ru{}, time.Hour) // disable aging so we test pure backfill 107 + 108 + hold, err := scheduler.Acquire(context.Background(), ru{a: 512}) 109 + if err != nil { 110 + t.Fatalf("hold Acquire() error = %v", err) 111 + } 112 + defer hold.Release() 113 + 114 + bigCh := acquireAsync(context.Background(), scheduler, ru{a: 768}) 115 + assertAcquireBlocked(t, bigCh) 116 + 117 + smallCh := acquireAsync(context.Background(), scheduler, ru{a: 256}) 118 + small := waitAcquireOK(t, smallCh) 119 + small.Release() 120 + 121 + assertAcquireBlocked(t, bigCh) 122 + } 123 + 124 + func TestResourceSchedulerAgingReservesCapacityForBlockedHead(t *testing.T) { 125 + t.Parallel() 126 + 127 + scheduler := NewResourceScheduler(ru{a: 1024}, ru{}, 10*time.Millisecond) 128 + fakeNow := time.Now() 129 + scheduler.now = func() time.Time { return fakeNow } 130 + 131 + hold, err := scheduler.Acquire(context.Background(), ru{a: 512}) 132 + if err != nil { 133 + t.Fatalf("hold Acquire() error = %v", err) 134 + } 135 + 136 + bigCh := acquireAsync(context.Background(), scheduler, ru{a: 768}) 137 + assertAcquireBlocked(t, bigCh) 138 + 139 + fakeNow = fakeNow.Add(time.Second) 140 + 141 + // big is now aged and reserves its 768. a 256 request would fit 142 + // alongside the held 512, but the reservation blocks it. 143 + smallCh := acquireAsync(context.Background(), scheduler, ru{a: 256}) 144 + assertAcquireBlocked(t, smallCh) 145 + 146 + hold.Release() 147 + 148 + big := waitAcquireOK(t, bigCh) 149 + small := waitAcquireOK(t, smallCh) 150 + small.Release() 151 + big.Release() 152 + } 153 + 154 + func acquireAsync(ctx context.Context, scheduler *ResourceScheduler[ru], req ru) <-chan acquireResult { 155 + ch := make(chan acquireResult, 1) 156 + go func() { 157 + slot, err := scheduler.Acquire(ctx, req) 158 + ch <- acquireResult{slot: slot, err: err} 159 + }() 160 + return ch 161 + } 162 + 163 + func assertAcquireBlocked(t *testing.T, ch <-chan acquireResult) { 164 + t.Helper() 165 + 166 + select { 167 + case res := <-ch: 168 + if res.slot != nil { 169 + res.slot.Release() 170 + } 171 + t.Fatalf("Acquire() returned before resources were available: err=%v", res.err) 172 + case <-time.After(25 * time.Millisecond): 173 + } 174 + } 175 + 176 + func waitAcquireOK(t *testing.T, ch <-chan acquireResult) WorkflowSlot { 177 + t.Helper() 178 + 179 + res := waitAcquireResult(t, ch) 180 + if res.err != nil { 181 + t.Fatalf("Acquire() error = %v", res.err) 182 + } 183 + if res.slot == nil { 184 + t.Fatal("Acquire() returned nil slot") 185 + } 186 + return res.slot 187 + } 188 + 189 + func waitAcquireResult(t *testing.T, ch <-chan acquireResult) acquireResult { 190 + t.Helper() 191 + 192 + select { 193 + case res := <-ch: 194 + return res 195 + case <-time.After(time.Second): 196 + t.Fatal("timed out waiting for Acquire() result") 197 + } 198 + 199 + return acquireResult{} 200 + }
+54
spindle/engine/slot.go
··· 1 + package engine 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + 7 + "tangled.org/core/spindle/models" 8 + ) 9 + 10 + var ErrNoWorkflowSlots = errors.New("no workflow slots available") 11 + 12 + type WorkflowSlot interface { 13 + Release() 14 + } 15 + 16 + type WorkflowSlotter interface { 17 + AcquireWorkflowSlot(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) (WorkflowSlot, error) 18 + } 19 + 20 + type releaseFunc func() 21 + 22 + func (f releaseFunc) Release() { 23 + if f != nil { 24 + f() 25 + } 26 + } 27 + 28 + type NoopSlot struct{} 29 + 30 + func (NoopSlot) Release() {} 31 + 32 + // limit by concurrent workflow count 33 + type SemaphoreSlotter struct { 34 + slots chan struct{} 35 + } 36 + 37 + func NewSemaphoreSlotter(maxConcurrent int) *SemaphoreSlotter { 38 + if maxConcurrent <= 0 { 39 + return &SemaphoreSlotter{} 40 + } 41 + return &SemaphoreSlotter{slots: make(chan struct{}, maxConcurrent)} 42 + } 43 + 44 + func (a *SemaphoreSlotter) AcquireWorkflowSlot(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) (WorkflowSlot, error) { 45 + if a == nil || a.slots == nil { 46 + return NoopSlot{}, nil 47 + } 48 + select { 49 + case a.slots <- struct{}{}: 50 + return releaseFunc(func() { <-a.slots }), nil 51 + case <-ctx.Done(): 52 + return nil, ctx.Err() 53 + } 54 + }
+112
spindle/engine/slot_test.go
··· 1 + package engine 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "testing" 7 + "time" 8 + 9 + "tangled.org/core/spindle/models" 10 + ) 11 + 12 + func TestSemaphoreSlotterDisabledDoesNotBlock(t *testing.T) { 13 + t.Parallel() 14 + 15 + slotter := NewSemaphoreSlotter(0) 16 + 17 + for range 10 { 18 + slot, err := slotter.AcquireWorkflowSlot(context.Background(), zeroWorkflowID(), nil) 19 + if err != nil { 20 + t.Fatalf("AcquireWorkflowSlot() error = %v", err) 21 + } 22 + slot.Release() 23 + } 24 + } 25 + 26 + func TestSemaphoreSlotterBlocksUntilRelease(t *testing.T) { 27 + t.Parallel() 28 + 29 + slotter := NewSemaphoreSlotter(1) 30 + 31 + first, err := slotter.AcquireWorkflowSlot(context.Background(), zeroWorkflowID(), nil) 32 + if err != nil { 33 + t.Fatalf("first AcquireWorkflowSlot() error = %v", err) 34 + } 35 + releasedFirst := false 36 + defer func() { 37 + if !releasedFirst { 38 + first.Release() 39 + } 40 + }() 41 + 42 + acquired := make(chan WorkflowSlot, 1) 43 + errs := make(chan error, 1) 44 + go func() { 45 + slot, err := slotter.AcquireWorkflowSlot(context.Background(), zeroWorkflowID(), nil) 46 + if err != nil { 47 + errs <- err 48 + return 49 + } 50 + acquired <- slot 51 + }() 52 + 53 + assertNotAcquired(t, acquired, errs) 54 + 55 + first.Release() 56 + releasedFirst = true 57 + 58 + second := waitForSlot(t, acquired, errs) 59 + second.Release() 60 + } 61 + 62 + func TestSemaphoreSlotterHonorsContextCancellation(t *testing.T) { 63 + t.Parallel() 64 + 65 + slotter := NewSemaphoreSlotter(1) 66 + 67 + first, err := slotter.AcquireWorkflowSlot(context.Background(), zeroWorkflowID(), nil) 68 + if err != nil { 69 + t.Fatalf("first AcquireWorkflowSlot() error = %v", err) 70 + } 71 + defer first.Release() 72 + 73 + ctx, cancel := context.WithCancel(context.Background()) 74 + cancel() 75 + 76 + _, err = slotter.AcquireWorkflowSlot(ctx, zeroWorkflowID(), nil) 77 + if !errors.Is(err, context.Canceled) { 78 + t.Fatalf("AcquireWorkflowSlot() error = %v, want context.Canceled", err) 79 + } 80 + } 81 + 82 + func assertNotAcquired(t *testing.T, acquired <-chan WorkflowSlot, errs <-chan error) { 83 + t.Helper() 84 + 85 + select { 86 + case slot := <-acquired: 87 + slot.Release() 88 + t.Fatal("AcquireWorkflowSlot() acquired a slot before one was released") 89 + case err := <-errs: 90 + t.Fatalf("AcquireWorkflowSlot() returned unexpected error: %v", err) 91 + case <-time.After(25 * time.Millisecond): 92 + } 93 + } 94 + 95 + func waitForSlot(t *testing.T, acquired <-chan WorkflowSlot, errs <-chan error) WorkflowSlot { 96 + t.Helper() 97 + 98 + select { 99 + case slot := <-acquired: 100 + return slot 101 + case err := <-errs: 102 + t.Fatalf("AcquireWorkflowSlot() returned error: %v", err) 103 + case <-time.After(time.Second): 104 + t.Fatal("timed out waiting for slot acquisition") 105 + } 106 + 107 + return nil 108 + } 109 + 110 + func zeroWorkflowID() models.WorkflowId { 111 + return models.WorkflowId{} 112 + }
+208
spindle/engines/microvm/README.md
··· 1 + # spindle microVM engine 2 + 3 + This document describes the architecture of the microvm engine for spindle. In 4 + short it allows the spindle to spin up microvm guests, and implements a guest 5 + [agent protocol](../../agentproto) for communicating with those guests (via the 6 + [shuttle](../../../shuttle) implementation of that proto). It implements some 7 + fairly simple resource budgeting and optionally sets up cgroups for better 8 + enforcing resource limits, and hardens the VM network access. It has Nix cache 9 + integration for any paths built in the VM, those will get pushed to a Nix cache 10 + by the spindle (if one is configured). The runner is abstracted behind an 11 + interface; right now only the QEMU microVM impl is supported, but others (e.g. 12 + firecracker) can slot in later. 13 + 14 + Currently two kinds of images are supported: 15 + 16 + - NixOS images: these allow configuration such as `dependencies`, `services`, 17 + `virtualisation`, `registry`, `caches` in the workflow file itself. The guest 18 + agent will build (or if it's cached, spindle will send the store path for 19 + realization) and activate it before any workflow steps are ran. 20 + - Non-NixOS: this is mainly just Alpine for now, but can be anything else. 21 + Workflow-level configuration like NixOS aren't supported while using these. If 22 + Nix exists inside the image (like in our Alpine image) it will still be able 23 + to make use of the spindle cache. 24 + 25 + (For testing, you can run `bash spindle/engines/microvm/test-spindle-microvm.sh` 26 + from repo root. These test the Alpine & NixOS, and features like if Docker 27 + works, public internet is reachable, and so on.) 28 + 29 + ## Image builds 30 + 31 + Image builds right now are done via Nix: 32 + 33 + - For NixOS, we use [microvm.nix](https://github.com/microvm-nix/microvm.nix), 34 + and layer our own configs on-top, see [here](../../../nix/microvm). 35 + - For Alpine we have a small-ish Nix definition that includes fetching the 36 + kernel, initrd, kernel modules; setting up the init script that configures the 37 + VM proper; copying dependencies (like `nix` or `git`) into a rootfs and 38 + creating a squashfs from it. 39 + 40 + This does not mean it *has* to be done via Nix, as long as your images are what 41 + spindle expects, they should work. That is: 42 + - a guest agent is present inside of the image and when that image boots it will 43 + get started, 44 + - `spindle-workflow` user exists, 45 + - and the work directory is configured (`/workspace`). 46 + 47 + ## Image discovery 48 + 49 + Each built image ships with a `spec.json` next to its artifacts. This spec 50 + describes everything needed to run the image: the kernel, initrd and read-only 51 + store disk paths, boot args, memory/vCPU sizing, the shell used for workflow 52 + steps, writable volumes, network interfaces, and runner-specific config (machine 53 + type, CPU, extra args for QEMU). NixOS images also carry a `baseConfigHash` 54 + identifying the base configuration baked into the image. 55 + 56 + An image lives in the configured image directory either as a directory 57 + containing a `spec.json` (alongside the kernel/initrd/store-disk artifacts) or, 58 + for a self-contained spec, as a flat `<name>.json` file. An operator keeping 59 + multiple arches side by side can name them `<name>-<arch>` (eg. `nixos-x86_64`, 60 + `alpine-aarch64`); that arch suffix is just part of the name, not something 61 + resolution infers. 62 + 63 + A workflow names an image with the `image` key at top-level (falling back to 64 + `SPINDLE_MICROVM_PIPELINES_DEFAULT_IMAGE` if unset). The name is matched 65 + literally: we look for `<name>` (a directory with a `spec.json`) then 66 + `<name>.json`. Resolution depends only on the name and what is on disk, never on 67 + the host, so the same workflow resolves identically on every spindle. If for 68 + example an operator wants `nixos` to work, they can symlink `nixos` to 69 + `nixos-x86_64`. 70 + 71 + The spec is validated at resolve time (required fields, positive sizes etc.), 72 + and right before launch we also check the referenced files actually exist on 73 + disk and that the host has the commands we need: `mkfs.ext4` for volume 74 + formatting, plus whatever the selected runner requires. For QEMU that's the QEMU 75 + binary for the spec's arch, `/dev/vhost-vsock`, `/dev/kvm` (if KVM is enabled), 76 + and the `ip`, `mount`, `slirp4netns`, `unshare` toolchain when the image has 77 + network interfaces. 78 + 79 + ## microVM lifecycle 80 + 81 + ```mermaid 82 + flowchart LR 83 + Init["InitWorkflow<br/><small>parse manifest, resolve image, build steps</small>"] 84 + Acquire["AcquireWorkflowSlot<br/><small>queue until resources fit budget</small>"] 85 + Setup["SetupWorkflow<br/><small>proxies, VM, agent handshake</small>"] 86 + Run["RunStep ×N<br/><small>exec via agent</small>"] 87 + Destroy["DestroyWorkflow<br/><small>drain cache, poweroff, cleanup</small>"] 88 + 89 + Init --> Acquire --> Setup --> Run --> Destroy 90 + ``` 91 + 92 + While a workflow is running, things look like this (everything inside the cgroup 93 + box is what gets resource-limited): 94 + 95 + ```mermaid 96 + flowchart LR 97 + subgraph Host["spindle host"] 98 + Hub["agent hub"] 99 + ReadProxy["read cache proxy"] 100 + UploadProxy["upload cache proxy"] 101 + subgraph Cgroup["per-workflow cgroup"] 102 + QEMU["qemu"] 103 + Slirp["slirp4netns"] 104 + end 105 + end 106 + 107 + subgraph Guest["guest"] 108 + Agent["guest agent"] 109 + end 110 + 111 + Agent -->|"vsock"| Hub 112 + Agent -->|substitutions| ReadProxy 113 + Agent -->|built paths| UploadProxy 114 + QEMU --- Guest 115 + Slirp -->|outbound only| Internet["the internet"] 116 + ReadProxy --> Substituters["upstream caches"] 117 + UploadProxy --> NixCache["spindle nix cache"] 118 + ``` 119 + 120 + `InitWorkflow` parses the workflow manifest, resolves the image, and assembles 121 + the step list: the clone step first, then (for NixOS images with a workflow 122 + config) a "NixOS config activation" system step, then the user steps. Before any 123 + of this actually runs the workflow has to acquire a slot from the resource 124 + scheduler, each image declares its memory/vCPUs/disk and workflows queue until 125 + their request fits within the configured budget. The scheduler is 126 + work-conserving with aging and per-user fairness, so one user submitting a pile 127 + of jobs won't starve everyone else, and slots don't sit idle while there's 128 + queued work that fits in the budget. 129 + 130 + ### Configuration 131 + 132 + Setup allocates a random vsock CID for the guest and registers it with the agent 133 + hub, which listens on a single host vsock port. Incoming agent connections are 134 + matched to workflows by CID, anything with an unknown CID is dropped. It then 135 + creates a per-workflow work directory and starts three host-side proxies the guest 136 + reaches over vsock: a read cache proxy (fronting the configured Nix substituters 137 + plus any workflow-level `caches`) and an upload cache proxy (for pushing paths 138 + built in the guest to the spindle's cache), plus a DNS proxy that resolves 139 + through the host's resolver and filters private/special-purpose address answers. 140 + 141 + Then the VM itself. Writable volumes from the spec are created as sparse files 142 + and formatted ext4, the store disk is attached read-only. QEMU runs with 143 + `-sandbox on`, `-nodefaults`, no display/monitor, etc., serial output to a log 144 + file, and a QMP socket for control. 145 + 146 + For network hardening: if the image has network interfaces, QEMU doesn't run in 147 + the host network namespace at all. We `unshare` into fresh user/net/mount 148 + namespaces, and a small wrapper script inside the namespace bind-mounts a 149 + resolv.conf that disables qemu's slirp DNS and adds blackhole routes for every 150 + special-use IPv4/IPv6 range (RFC 6890, so private networks, link-local, 151 + loopback, CGNAT, multicast, ULAs and so on) before exec'ing QEMU. `slirp4netns` 152 + (with `--disable-host-loopback`, sandbox and seccomp enabled) then provides 153 + outbound connectivity for the namespace. The guest's `/etc/resolv.conf` points 154 + at shuttle on localhost; shuttle forwards DNS packets over vsock to the 155 + host-side DNS proxy. The guest sits behind a second layer of QEMU user-mode 156 + networking inside that namespace, so guest traffic can only ever reach the 157 + outside world, never the host or anything on its local networks. 158 + 159 + Optionally the whole thing (QEMU and slirp4netns) is placed in a per-workflow 160 + cgroup with memory, swap and pids limits, so the budget above is actually 161 + enforced and not just bookkeeping. That also allows us to, for example, if the 162 + cgroup OOM-kills the VM we can detect that and report it as such instead of a 163 + generic crash. The spindle supervisor itself also gets a cgroup with a 164 + protected `memory.min`, so under host memory pressure it's the workflows that 165 + get OOM-killed first, not spindle. 166 + 167 + ### Boot - run - death 168 + 169 + Once QEMU is up we poll the QMP socket until it accepts a connection and reports 170 + the guest as running, then wait for the guest agent to send handshake message 171 + over vsock from the expected CID. It reports its protocol and versions, and 172 + spindle sends it the job id, trusted cache public keys, and the cache/DNS proxy 173 + ports. 174 + 175 + First the activation step is ran (if on a NixOS image and the workflow is 176 + configured with anything), spindle sends the user config (or a cached toplevel 177 + store path, if we've built this exact base + config combo before) and the agent 178 + builds and activates it before the user steps run. Afterwards, each step is sent 179 + as an exec request (`$shell -lc <command>` as an unprivileged workflow user in 180 + `/workspace/repo`, with workflow/step environment and unlocked secrets), and 181 + stdout/stderr stream back as messages until an exit message arrives. Timeouts 182 + are cooperative: we derive a deadline from the workflow timeout and ship it to 183 + the guest, with a little grace on the host side so the guest gets to report the 184 + timeout itself. While a step runs we also watch for the VM crashing, if it does 185 + we tail the serial (and qemu) logs into the step's stderr so you get something 186 + more useful than "guest agent connection lost: EOF". 187 + 188 + Teardown is same whether the workflow succeeded, failed or timed out: drain the 189 + guest's pending Nix cache uploads, ask the agent to power off and wait for QEMU 190 + to exit (falling back to QMP `system_powerdown` and finally a kill if it 191 + doesn't), then close the proxies and remove the work directory. 192 + 193 + ### Nix cache 194 + 195 + The two host-side proxies are how the guest talks to spindle's Nix cache without 196 + ever needing credentials or direct network access; like the agent they reach the 197 + host over vsock. 198 + 199 + The read proxy fronts the configured substituters plus any workflow-level 200 + `caches`. When the guest needs to realize a store path it asks the proxy, which 201 + queries the read caches concurrently and returns the first successful response, 202 + with a 404 only winning if every upstream returns 404. 203 + 204 + The upload proxy goes the other way: paths built inside the guest are pushed to 205 + spindle's configured upload cache (if any) so the next workflow that needs them 206 + doesn't rebuild. Paths already present on any configured read cache are skipped. 207 + The agent queues built paths and they're uploaded eagerly as they appear; any 208 + still in flight at teardown block the drain step until they finish.
+368
spindle/engines/microvm/agent.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "fmt" 7 + "io" 8 + "log/slog" 9 + "net" 10 + "sync" 11 + "time" 12 + 13 + "github.com/mdlayher/vsock" 14 + 15 + "tangled.org/core/spindle/agentproto" 16 + agentv1 "tangled.org/core/spindle/agentproto/gen" 17 + ) 18 + 19 + const guestWorkflowUser = "spindle-workflow" 20 + 21 + var errGuestTimedOut = errors.New("guest reported step timed out") 22 + 23 + type agentHub struct { 24 + l *slog.Logger 25 + ln *vsock.Listener 26 + pending map[uint32]chan net.Conn 27 + mu sync.Mutex 28 + } 29 + 30 + func newAgentHub(port uint32, l *slog.Logger) (*agentHub, error) { 31 + ln, err := vsock.Listen(port, nil) 32 + if err != nil { 33 + return nil, fmt.Errorf("listen for agent on vsock port %d: %w", port, err) 34 + } 35 + h := &agentHub{ 36 + l: l, 37 + ln: ln, 38 + pending: make(map[uint32]chan net.Conn), 39 + } 40 + go h.acceptLoop() 41 + return h, nil 42 + } 43 + 44 + func (h *agentHub) expect(cid uint32) (<-chan net.Conn, func(), error) { 45 + h.mu.Lock() 46 + defer h.mu.Unlock() 47 + if _, exists := h.pending[cid]; exists { 48 + return nil, nil, fmt.Errorf("already waiting for agent cid %d", cid) 49 + } 50 + ch := make(chan net.Conn, 1) 51 + h.pending[cid] = ch 52 + unregister := func() { 53 + h.mu.Lock() 54 + delete(h.pending, cid) 55 + h.mu.Unlock() 56 + close(ch) 57 + for conn := range ch { 58 + if conn != nil { 59 + _ = conn.Close() 60 + } 61 + } 62 + } 63 + return ch, unregister, nil 64 + } 65 + 66 + func (h *agentHub) acceptLoop() { 67 + for { 68 + conn, err := h.ln.Accept() 69 + if err != nil { 70 + h.l.Error("agent vsock accept failed", "error", err) 71 + return 72 + } 73 + 74 + addr, ok := conn.RemoteAddr().(*vsock.Addr) 75 + if !ok { 76 + h.l.Warn("agent connection has unexpected remote address", "remote", conn.RemoteAddr()) 77 + _ = conn.Close() 78 + continue 79 + } 80 + 81 + h.mu.Lock() 82 + ch, ok := h.pending[addr.ContextID] 83 + if ok { 84 + delete(h.pending, addr.ContextID) 85 + } 86 + h.mu.Unlock() 87 + 88 + // todo: if / when we add agent recovery (reconnect) we should add a 89 + // boot-initialized session credential to prevent random connections... 90 + // checking cid here works to ensure for now since we dont attempt to 91 + // reconnect, so we block anything else thats not expected (and agent 92 + // runs first in the boot sequence always). 93 + if !ok { 94 + h.l.Warn("dropping agent connection for unknown cid", "cid", addr.ContextID) 95 + _ = conn.Close() 96 + continue 97 + } 98 + 99 + select { 100 + case ch <- conn: 101 + default: 102 + _ = conn.Close() 103 + } 104 + } 105 + } 106 + 107 + type AgentExec struct { 108 + *agentv1.ExecStart 109 + ID string 110 + Stdout io.Writer 111 + Stderr io.Writer 112 + } 113 + 114 + type AgentSession struct { 115 + conn net.Conn 116 + enc *agentproto.Encoder 117 + dec *agentproto.Decoder 118 + l *slog.Logger 119 + mu sync.Mutex 120 + } 121 + 122 + func NewAgentSession(conn net.Conn, l *slog.Logger) *AgentSession { 123 + return &AgentSession{ 124 + conn: conn, 125 + enc: agentproto.NewEncoder(conn), 126 + dec: agentproto.NewDecoder(conn), 127 + l: l, 128 + } 129 + } 130 + 131 + func (s *AgentSession) Init(ctx context.Context, init *agentv1.Init) error { 132 + s.mu.Lock() 133 + defer s.mu.Unlock() 134 + 135 + hello, err := s.decode(ctx) 136 + if err != nil { 137 + return fmt.Errorf("read agent hello: %w", err) 138 + } 139 + helloPayload := hello.Hello 140 + if helloPayload == nil { 141 + return fmt.Errorf("expected agent hello, got nil") 142 + } 143 + s.l.Info("agent connected", "protocol", helloPayload.ProtocolVersion, "version", helloPayload.AgentVersion, "boot", helloPayload.BootId, "nix", helloPayload.NixVersion) 144 + 145 + if err := s.enc.Encode(&agentproto.Message{ 146 + Id: "init", 147 + Init: init, 148 + }); err != nil { 149 + return fmt.Errorf("send agent init: %w", err) 150 + } 151 + return nil 152 + } 153 + 154 + func (s *AgentSession) Exec(ctx context.Context, exec AgentExec) (int, error) { 155 + s.mu.Lock() 156 + defer s.mu.Unlock() 157 + 158 + if exec.ID == "" { 159 + return 0, fmt.Errorf("empty ID passed to Exec") 160 + } 161 + 162 + if exec.ExecStart.TimeoutSeconds == 0 { 163 + exec.ExecStart.TimeoutSeconds = timeoutSeconds(ctx, guestTimeoutGrace) 164 + } 165 + 166 + if err := s.enc.Encode(&agentproto.Message{ 167 + Id: exec.ID, 168 + ExecStart: exec.ExecStart, 169 + }); err != nil { 170 + return 0, fmt.Errorf("send exec_start: %w", err) 171 + } 172 + 173 + for { 174 + msg, err := s.decode(ctx) 175 + if err != nil { 176 + return 0, err 177 + } 178 + if msg.BuiltPaths == nil && msg.Id != exec.ID { 179 + continue 180 + } 181 + 182 + if p := msg.ExecStdout; p != nil { 183 + _, _ = io.WriteString(exec.Stdout, p.Data) 184 + } else if p := msg.ExecStderr; p != nil { 185 + _, _ = io.WriteString(exec.Stderr, p.Data) 186 + } else if p := msg.BuiltPaths; p != nil { 187 + // s.l.Debug("guest built paths", "reason", p.Reason, "count", len(p.Paths)) 188 + } else if p := msg.ExecExit; p != nil { 189 + if p.Error != "" { 190 + s.l.Warn("guest exec error", "id", msg.Id, "error", p.Error) 191 + } 192 + if p.TimedOut { 193 + return int(p.ExitCode), errGuestTimedOut 194 + } 195 + return int(p.ExitCode), nil 196 + } 197 + } 198 + } 199 + 200 + func (s *AgentSession) ActivateConfig(ctx context.Context, id string, req *agentv1.ActivateConfig) (*agentv1.ActivateConfigResult, error) { 201 + s.mu.Lock() 202 + defer s.mu.Unlock() 203 + 204 + if id == "" { 205 + return nil, fmt.Errorf("empty ID passed to ActivateConfig") 206 + } 207 + if req.TimeoutSeconds == 0 { 208 + req.TimeoutSeconds = timeoutSeconds(ctx, guestTimeoutGrace) 209 + } 210 + if err := s.enc.Encode(&agentproto.Message{ 211 + Id: id, 212 + ActivateConfig: req, 213 + }); err != nil { 214 + return nil, fmt.Errorf("send activate_config: %w", err) 215 + } 216 + 217 + for { 218 + msg, err := s.decode(ctx) 219 + if err != nil { 220 + return nil, err 221 + } 222 + if msg.BuiltPaths == nil && msg.Id != id { 223 + continue 224 + } 225 + 226 + if p := msg.BuiltPaths; p != nil { 227 + // s.l.Debug("guest built paths", "reason", p.Reason, "count", len(p.Paths)) 228 + } else if p := msg.ActivateConfigResult; p != nil { 229 + if p.Error != "" { 230 + return nil, fmt.Errorf("activate config failed: %s", p.Error) 231 + } 232 + if p.Toplevel == "" { 233 + return nil, fmt.Errorf("activate config returned empty toplevel") 234 + } 235 + return p, nil 236 + } 237 + } 238 + } 239 + 240 + func (s *AgentSession) Poweroff(ctx context.Context) error { 241 + s.mu.Lock() 242 + defer s.mu.Unlock() 243 + 244 + id := "poweroff" 245 + if err := s.enc.Encode(&agentproto.Message{ 246 + Id: id, 247 + Poweroff: &agentv1.Poweroff{}, 248 + }); err != nil { 249 + return fmt.Errorf("send poweroff: %w", err) 250 + } 251 + 252 + for { 253 + msg, err := s.decode(ctx) 254 + if err != nil { 255 + return err 256 + } 257 + if msg.Id != id { 258 + continue 259 + } 260 + p := msg.PoweroffResult 261 + if p == nil { 262 + continue 263 + } 264 + if p.Error != "" { 265 + return fmt.Errorf("guest poweroff failed: %s", p.Error) 266 + } 267 + return nil 268 + } 269 + } 270 + 271 + func (s *AgentSession) Drain(ctx context.Context) (uint32, error) { 272 + s.mu.Lock() 273 + defer s.mu.Unlock() 274 + 275 + drainID := "cache-drain" 276 + if err := s.enc.Encode(&agentproto.Message{ 277 + Id: drainID, 278 + CacheDrain: &agentv1.CacheDrain{ 279 + TimeoutSeconds: timeoutSeconds(ctx, 0), 280 + }, 281 + }); err != nil { 282 + return 0, fmt.Errorf("send cache_drain: %w", err) 283 + } 284 + 285 + for { 286 + msg, err := s.decode(ctx) 287 + if err != nil { 288 + return 0, err 289 + } 290 + if msg.Id != drainID { 291 + continue 292 + } 293 + p := msg.CacheDrainResult 294 + if p == nil { 295 + continue 296 + } 297 + s.l.Info("cache drain complete", "uploaded", p.CacheUploaded, "failed", p.CacheFailed, "queued", p.CacheQueued, "active", p.CacheActive) 298 + if p.Error != "" { 299 + return 0, fmt.Errorf("cache drain failed: %s", p.Error) 300 + } 301 + if p.CacheFailed > 0 { 302 + return 0, fmt.Errorf("cache drain failed for %d paths", p.CacheFailed) 303 + } 304 + if p.CacheQueued > 0 || p.CacheActive > 0 { 305 + return 0, fmt.Errorf("cache drain incomplete: queued=%d active=%d", p.CacheQueued, p.CacheActive) 306 + } 307 + return p.CacheUploaded, nil 308 + } 309 + } 310 + 311 + func (s *AgentSession) decode(ctx context.Context) (*agentproto.Message, error) { 312 + if err := ctx.Err(); err != nil { 313 + return nil, err 314 + } 315 + 316 + if deadline, ok := ctx.Deadline(); ok { 317 + _ = s.conn.SetReadDeadline(deadline) 318 + } else { 319 + _ = s.conn.SetReadDeadline(time.Time{}) 320 + } 321 + 322 + // a blocked vsock read wont wake up just from the ctx being cancelled, 323 + // only a deadline will wake it up, so if the VM crashes mid-step the read would 324 + // hang until workflow timeout. so we will set a deadline in the past to cancel it. 325 + // 326 + // we set a deadline here instead of closing the connection, this is the long-lived 327 + // connection that everything reuses, so we only really want to interrupt it for this 328 + // current read. this also lands as a timeout error which the netErr.Timeout() check 329 + // below maps to ctx.Err() correctly 330 + stop := context.AfterFunc(ctx, func() { 331 + _ = s.conn.SetReadDeadline(time.Now()) 332 + }) 333 + defer stop() 334 + 335 + msg, err := s.dec.Decode() 336 + if err != nil { 337 + var netErr net.Error 338 + if errors.As(err, &netErr) && netErr.Timeout() && ctx.Err() != nil { 339 + return nil, ctx.Err() 340 + } 341 + return nil, fmt.Errorf("read agent message: %w", err) 342 + } 343 + return msg, nil 344 + } 345 + 346 + func (s *AgentSession) Close() error { 347 + if s == nil || s.conn == nil { 348 + return nil 349 + } 350 + return s.conn.Close() 351 + } 352 + 353 + // this pulls the deadline from the context and converts it to what the 354 + // agentproto expects 355 + func timeoutSeconds(ctx context.Context, lead time.Duration) uint32 { 356 + deadline, ok := ctx.Deadline() 357 + if !ok { 358 + return 0 359 + } 360 + seconds := int64((time.Until(deadline) - lead).Round(time.Second) / time.Second) 361 + if seconds < 1 { 362 + return 1 363 + } 364 + if seconds > int64(^uint32(0)) { 365 + return ^uint32(0) 366 + } 367 + return uint32(seconds) 368 + }
+69
spindle/engines/microvm/args.go
··· 1 + package microvm 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + ) 7 + 8 + type argBuilder struct { 9 + args []string 10 + } 11 + 12 + func newArgBuilder(capacity int) argBuilder { 13 + return argBuilder{ 14 + args: make([]string, 0, capacity), 15 + } 16 + } 17 + 18 + func (b *argBuilder) Add(args ...string) *argBuilder { 19 + b.args = append(b.args, args...) 20 + return b 21 + } 22 + 23 + func (b *argBuilder) Flag(name string) *argBuilder { 24 + b.args = append(b.args, name) 25 + return b 26 + } 27 + 28 + func (b *argBuilder) Opt(name, value string) *argBuilder { 29 + b.args = append(b.args, name, value) 30 + return b 31 + } 32 + 33 + func (b *argBuilder) Optf(name, format string, values ...any) *argBuilder { 34 + return b.Opt(name, fmt.Sprintf(format, values...)) 35 + } 36 + 37 + func (b *argBuilder) Args() []string { 38 + args := make([]string, len(b.args)) 39 + copy(args, b.args) 40 + return args 41 + } 42 + 43 + type optionBuilder struct { 44 + parts []string 45 + } 46 + 47 + func newOptionBuilder(capacity int) optionBuilder { 48 + return optionBuilder{ 49 + parts: make([]string, 0, capacity), 50 + } 51 + } 52 + 53 + func (b *optionBuilder) Add(parts ...string) *optionBuilder { 54 + b.parts = append(b.parts, parts...) 55 + return b 56 + } 57 + 58 + func (b *optionBuilder) KV(key, value string) *optionBuilder { 59 + b.parts = append(b.parts, key+"="+value) 60 + return b 61 + } 62 + 63 + func (b *optionBuilder) KVf(key, format string, values ...any) *optionBuilder { 64 + return b.KV(key, fmt.Sprintf(format, values...)) 65 + } 66 + 67 + func (b optionBuilder) String() string { 68 + return strings.Join(b.parts, ",") 69 + }
+38
spindle/engines/microvm/bench-boot.sh
··· 1 + #!/usr/bin/env bash 2 + # quick boot-time benchmark for the spindle nixos microvm. 3 + # boots N times running a trivial command, reports wall-clock + systemd-analyze. 4 + # needs: sudo modprobe vhost_vsock 5 + set -euo pipefail 6 + 7 + N="${1:-5}" 8 + cd "$(git rev-parse --show-toplevel)" 9 + 10 + strip_ansi() { sed -E "s/$(printf '\033')\[[0-9;]*[a-zA-Z]//g; s/$(printf '\033')\([a-zA-Z]//g"; } 11 + 12 + echo ">>> building runner + image" 13 + nix develop --command go build -o spindle/spindle-microvm-run ./cmd/spindle-microvm-run 14 + TARBALL=$(nix build .#spindle-nixos-image-tarball --no-link --print-out-paths) 15 + 16 + WORK=$(mktemp -d -t spindle-bench-XXXXXX) 17 + trap 'chmod -R +w "$WORK" 2>/dev/null || true; rm -rf "$WORK"' EXIT 18 + mkdir -p "$WORK/image" 19 + tar -C "$WORK/image" -xzf "$TARBALL" 20 + SPEC="$WORK/image/spec.json" 21 + 22 + echo ">>> systemd-analyze breakdown" 23 + spindle/spindle-microvm-run --image-spec "$SPEC" --work-dir "$WORK/analyze" --exec-timeout 60s -- \ 24 + /run/current-system/sw/bin/systemd-analyze time 2>/dev/null | strip_ansi | grep -i startup || true 25 + 26 + echo ">>> $N timed boot+exec(true) runs" 27 + total=0 28 + for i in $(seq 1 "$N"); do 29 + start=$EPOCHREALTIME 30 + spindle/spindle-microvm-run --image-spec "$SPEC" --work-dir "$WORK/run$i" --exec-timeout 60s -- \ 31 + /run/current-system/sw/bin/true >/dev/null 2>&1 32 + end=$EPOCHREALTIME 33 + ms=$(( (${end%.*} - ${start%.*}) * 1000 + (10#${end#*.} - 10#${start#*.}) / 1000 )) 34 + echo " run $i: ${ms}ms" 35 + total=$((total + ms)) 36 + rm -rf "$WORK/run$i" 37 + done 38 + echo ">>> mean wall-clock: $((total / N))ms over $N runs"
+91
spindle/engines/microvm/budget.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "time" 7 + 8 + "tangled.org/core/spindle/config" 9 + "tangled.org/core/spindle/engine" 10 + "tangled.org/core/spindle/models" 11 + ) 12 + 13 + type Resources struct { 14 + MemoryMiB int64 15 + VCPUs int64 16 + DiskMiB int64 17 + } 18 + 19 + func (r Resources) Fits(limit Resources) bool { 20 + if limit.MemoryMiB > 0 && r.MemoryMiB > limit.MemoryMiB { 21 + return false 22 + } 23 + if limit.VCPUs > 0 && r.VCPUs > limit.VCPUs { 24 + return false 25 + } 26 + if limit.DiskMiB > 0 && r.DiskMiB > limit.DiskMiB { 27 + return false 28 + } 29 + return true 30 + } 31 + 32 + func (r Resources) Add(other Resources) Resources { 33 + return Resources{ 34 + MemoryMiB: r.MemoryMiB + other.MemoryMiB, 35 + VCPUs: r.VCPUs + other.VCPUs, 36 + DiskMiB: r.DiskMiB + other.DiskMiB, 37 + } 38 + } 39 + 40 + func (r Resources) Sub(other Resources) Resources { 41 + return Resources{ 42 + MemoryMiB: max(0, r.MemoryMiB-other.MemoryMiB), 43 + VCPUs: max(0, r.VCPUs-other.VCPUs), 44 + DiskMiB: max(0, r.DiskMiB-other.DiskMiB), 45 + } 46 + } 47 + 48 + func (r Resources) String() string { 49 + return fmt.Sprintf("memory=%dMiB vcpus=%d disk=%dMiB", r.MemoryMiB, r.VCPUs, r.DiskMiB) 50 + } 51 + 52 + func newVMBudgetConfig(cfg config.MicroVMPipelines) (Resources, Resources, time.Duration) { 53 + budget := Resources{ 54 + MemoryMiB: cfg.MaxTotalMemoryMiB, 55 + VCPUs: cfg.MaxTotalVCPUs, 56 + DiskMiB: cfg.MaxTotalDiskMiB, 57 + } 58 + maxReq := Resources{ 59 + MemoryMiB: cfg.MaxWorkflowMemoryMiB, 60 + VCPUs: cfg.MaxWorkflowVCPUs, 61 + DiskMiB: cfg.MaxWorkflowDiskMiB, 62 + } 63 + return budget, maxReq, cfg.AgingThreshold 64 + } 65 + 66 + func (e *Engine) AcquireWorkflowSlot(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) (engine.WorkflowSlot, error) { 67 + state, ok := wf.Data.(*workflowState) 68 + if !ok || state == nil { 69 + return nil, fmt.Errorf("microVM workflow state is not initialized") 70 + } 71 + if e.scheduler == nil { 72 + return engine.NoopSlot{}, nil 73 + } 74 + req := resourcesForImage(state.ImageSpec) 75 + if req.MemoryMiB < 0 || req.VCPUs < 0 || req.DiskMiB < 0 { 76 + return nil, fmt.Errorf("microVM resource request must not be negative: %s", req) 77 + } 78 + return e.scheduler.Acquire(ctx, req) 79 + } 80 + 81 + func resourcesForImage(spec ImageSpec) Resources { 82 + var diskMiB int64 83 + for _, volume := range spec.Volumes { 84 + diskMiB += volume.SizeMiB 85 + } 86 + return Resources{ 87 + MemoryMiB: int64(spec.MemoryMiB), 88 + VCPUs: int64(spec.VCPUs), 89 + DiskMiB: diskMiB, 90 + } 91 + }
+267
spindle/engines/microvm/cgroup.go
··· 1 + package microvm 2 + 3 + import ( 4 + "fmt" 5 + "log/slog" 6 + "os" 7 + "path/filepath" 8 + "regexp" 9 + "strings" 10 + 11 + cgroups "github.com/containerd/cgroups/v3" 12 + "github.com/containerd/cgroups/v3/cgroup2" 13 + "github.com/prometheus/procfs" 14 + ) 15 + 16 + var ( 17 + cgroupInvalidChar = regexp.MustCompile(`[^a-zA-Z0-9\-_.]`) 18 + cgroupConsecutiveSep = regexp.MustCompile(`[-_.]{2,}`) 19 + ) 20 + 21 + const ( 22 + cgroupParentSelf = "self" 23 + supervisorCgroupName = "supervisor" 24 + ) 25 + 26 + type CgroupLimits struct { 27 + Enabled bool 28 + Parent *CgroupParent 29 + Name string 30 + MemoryMaxMiB int64 31 + SwapMaxMiB *int64 32 + PidsMax int64 33 + } 34 + 35 + type CgroupParent struct { 36 + root *cgroup2.Manager 37 + mountpoint string 38 + group string 39 + } 40 + 41 + type CgroupHandle struct { 42 + manager *cgroup2.Manager 43 + } 44 + 45 + func initCgroupParent(parent string, supervisorMemoryMinMiB int64, logger *slog.Logger) (*CgroupParent, error) { 46 + if parent == "" { 47 + parent = cgroupParentSelf 48 + } 49 + if cgroups.Mode() != cgroups.Unified { 50 + return nil, fmt.Errorf("microVM cgroups require cgroup v2 unified mode") 51 + } 52 + 53 + mountpoint, group, err := resolveCgroupParent(parent) 54 + if err != nil { 55 + return nil, err 56 + } 57 + if _, err := os.Stat(filepath.Join(mountpoint, strings.TrimPrefix(group, "/"))); err != nil { 58 + return nil, fmt.Errorf("stat cgroup parent %q:%q: %w", mountpoint, group, err) 59 + } 60 + 61 + root, err := cgroup2.Load(group, cgroup2.WithMountpoint(mountpoint)) 62 + if err != nil { 63 + return nil, fmt.Errorf("load cgroup parent %q:%q: %w", mountpoint, group, err) 64 + } 65 + 66 + if group != "/" { 67 + if err := moveParentProcesses(root, supervisorMemoryMinMiB, logger); err != nil { 68 + return nil, err 69 + } 70 + } 71 + 72 + if logger != nil { 73 + logger.Info("initialized microVM cgroup parent", "mountpoint", mountpoint, "group", group) 74 + } 75 + return &CgroupParent{root: root, mountpoint: mountpoint, group: group}, nil 76 + } 77 + 78 + func prepareCgroup(limits CgroupLimits, logger *slog.Logger) (*CgroupHandle, error) { 79 + if !limits.Enabled { 80 + return nil, nil 81 + } 82 + if limits.Parent == nil || limits.Parent.root == nil { 83 + return nil, fmt.Errorf("cgroup parent is not initialized") 84 + } 85 + name := sanitizeCgroupName(limits.Name) 86 + if name == "" { 87 + return nil, fmt.Errorf("cgroup name is empty") 88 + } 89 + 90 + manager, err := limits.Parent.root.NewChild(name, cgroupResources(limits)) 91 + if err != nil { 92 + return nil, fmt.Errorf("create cgroup %q: %w", name, err) 93 + } 94 + 95 + if logger != nil { 96 + logger.Info("created microVM cgroup", "name", name, "parentGroup", limits.Parent.group) 97 + } 98 + return &CgroupHandle{manager: manager}, nil 99 + } 100 + 101 + func cgroupResources(limits CgroupLimits) *cgroup2.Resources { 102 + resources := &cgroup2.Resources{} 103 + if limits.MemoryMaxMiB > 0 || limits.SwapMaxMiB != nil { 104 + memory := &cgroup2.Memory{} 105 + if limits.MemoryMaxMiB > 0 { 106 + maxBytes := limits.MemoryMaxMiB * 1024 * 1024 107 + memory.Max = &maxBytes 108 + } 109 + if limits.SwapMaxMiB != nil { 110 + swapBytes := *limits.SwapMaxMiB * 1024 * 1024 111 + memory.Swap = &swapBytes 112 + } 113 + oomGroup := true 114 + memory.OOMGroup = &oomGroup 115 + resources.Memory = memory 116 + } 117 + if limits.PidsMax > 0 { 118 + resources.Pids = &cgroup2.Pids{Max: limits.PidsMax} 119 + } 120 + return resources 121 + } 122 + 123 + func supervisorResources(memoryMinMiB int64) *cgroup2.Resources { 124 + if memoryMinMiB <= 0 { 125 + return nil 126 + } 127 + minBytes := memoryMinMiB * 1024 * 1024 128 + return &cgroup2.Resources{ 129 + Memory: &cgroup2.Memory{Min: &minBytes}, 130 + } 131 + } 132 + 133 + func (h *CgroupHandle) AddProcess(pid int, logger *slog.Logger) error { 134 + if h == nil || h.manager == nil { 135 + return nil 136 + } 137 + if pid <= 0 { 138 + return fmt.Errorf("invalid pid %d", pid) 139 + } 140 + if err := h.manager.AddProc(uint64(pid)); err != nil { 141 + return fmt.Errorf("add pid %d to cgroup: %w", pid, err) 142 + } 143 + if logger != nil { 144 + logger.Info("added process to microVM cgroup", "pid", pid) 145 + } 146 + return nil 147 + } 148 + 149 + func (h *CgroupHandle) Close() error { 150 + if h == nil || h.manager == nil { 151 + return nil 152 + } 153 + return h.manager.Delete() 154 + } 155 + 156 + func (h *CgroupHandle) OOMKilled() bool { 157 + if h == nil || h.manager == nil { 158 + return false 159 + } 160 + metrics, err := h.manager.Stat() 161 + if err != nil || metrics == nil || metrics.MemoryEvents == nil { 162 + return false 163 + } 164 + return metrics.MemoryEvents.OomKill > 0 165 + } 166 + 167 + func resolveCgroupParent(parent string) (string, string, error) { 168 + mountpoint, err := cgroup2Mountpoint() 169 + if err != nil { 170 + return "", "", err 171 + } 172 + 173 + if parent == "" || parent == cgroupParentSelf { 174 + group, err := selfCgroupV2Path() 175 + if err != nil { 176 + return "", "", err 177 + } 178 + return mountpoint, group, nil 179 + } 180 + if !filepath.IsAbs(parent) { 181 + return "", "", fmt.Errorf("cgroup parent must be %q or an absolute delegated cgroupfs path: %q", cgroupParentSelf, parent) 182 + } 183 + 184 + cleanParent := filepath.Clean(parent) 185 + rel, err := filepath.Rel(mountpoint, cleanParent) 186 + if err != nil { 187 + return "", "", fmt.Errorf("resolve cgroup parent %q relative to cgroup2 mount %q: %w", cleanParent, mountpoint, err) 188 + } 189 + if rel == ".." || strings.HasPrefix(rel, "../") { 190 + return "", "", fmt.Errorf("cgroup parent %q is outside cgroup2 mount %q", cleanParent, mountpoint) 191 + } 192 + if rel == "." { 193 + return mountpoint, "/", nil 194 + } 195 + 196 + group := "/" + filepath.ToSlash(rel) 197 + if err := cgroup2.VerifyGroupPath(group); err != nil { 198 + return "", "", fmt.Errorf("invalid cgroup parent path %q: %w", group, err) 199 + } 200 + return mountpoint, group, nil 201 + } 202 + 203 + func cgroup2Mountpoint() (string, error) { 204 + mounts, err := procfs.GetMounts() 205 + if err != nil { 206 + return "", fmt.Errorf("read procfs mountinfo: %w", err) 207 + } 208 + for _, mount := range mounts { 209 + if mount.FSType == "cgroup2" { 210 + return mount.MountPoint, nil 211 + } 212 + } 213 + return "", fmt.Errorf("cgroup v2 mountpoint not found") 214 + } 215 + 216 + func selfCgroupV2Path() (string, error) { 217 + self, err := procfs.Self() 218 + if err != nil { 219 + return "", fmt.Errorf("open procfs self: %w", err) 220 + } 221 + groups, err := self.Cgroups() 222 + if err != nil { 223 + return "", fmt.Errorf("read procfs self cgroups: %w", err) 224 + } 225 + for _, group := range groups { 226 + if group.HierarchyID != 0 { 227 + continue 228 + } 229 + path := group.Path 230 + if path == "" { 231 + path = "/" 232 + } 233 + if err := cgroup2.VerifyGroupPath(path); err != nil { 234 + return "", fmt.Errorf("invalid self cgroup path %q: %w", path, err) 235 + } 236 + return path, nil 237 + } 238 + return "", fmt.Errorf("current process has no cgroup v2 hierarchy entry") 239 + } 240 + 241 + func moveParentProcesses(parent *cgroup2.Manager, supervisorMemoryMinMiB int64, logger *slog.Logger) error { 242 + supervisor, err := parent.NewChild(supervisorCgroupName, supervisorResources(supervisorMemoryMinMiB)) 243 + if err != nil { 244 + return fmt.Errorf("create supervisor cgroup: %w", err) 245 + } 246 + 247 + procs, err := parent.Procs(false) 248 + if err != nil { 249 + return fmt.Errorf("list parent cgroup processes: %w", err) 250 + } 251 + for _, pid := range procs { 252 + if err := supervisor.AddProc(pid); err != nil { 253 + return fmt.Errorf("move pid %d to supervisor cgroup: %w", pid, err) 254 + } 255 + } 256 + 257 + if logger != nil && len(procs) > 0 { 258 + logger.Info("moved spindle processes to supervisor cgroup", "processes", len(procs)) 259 + } 260 + return nil 261 + } 262 + 263 + func sanitizeCgroupName(name string) string { 264 + name = cgroupInvalidChar.ReplaceAllLiteralString(name, "-") 265 + name = cgroupConsecutiveSep.ReplaceAllLiteralString(name, "-") 266 + return strings.Trim(name, "-_.") 267 + }
+126
spindle/engines/microvm/cgroup_oom_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "log/slog" 5 + "os" 6 + "os/exec" 7 + "runtime" 8 + "testing" 9 + "time" 10 + 11 + cgroups "github.com/containerd/cgroups/v3" 12 + ) 13 + 14 + const memhogEnv = "SPINDLE_CGROUP_MEMHOG" 15 + 16 + func TestMain(m *testing.M) { 17 + if os.Getenv(memhogEnv) == "1" { 18 + runMemhogChild() 19 + return 20 + } 21 + os.Exit(m.Run()) 22 + } 23 + 24 + // this will allocate memory in steps until either the cgroup kills the process 25 + // this is running on, or if the limit is reached. the limit is there so that if 26 + // the cgroup somehow does not work, we don't kill the host and can observe that 27 + // failure. 28 + func runMemhogChild() { 29 + var b [1]byte 30 + _, _ = os.Stdin.Read(b[:]) 31 + 32 + const chunk = 4 << 20 // 4 MiB 33 + const limit = 512 << 20 // safety cap 34 + hold := make([][]byte, 0, limit/chunk) 35 + for total := 0; total < limit; total += chunk { 36 + c := make([]byte, chunk) 37 + for i := range c { 38 + c[i] = 1 // fault the pages in so they count against memory.current 39 + } 40 + hold = append(hold, c) 41 + time.Sleep(5 * time.Millisecond) 42 + } 43 + runtime.KeepAlive(hold) 44 + os.Exit(0) 45 + } 46 + 47 + // creates a cgroup parent, adds a memory limited child to it, and creates a 48 + // process that hogs memory and observes if it OOMs or not. 49 + // 50 + // run with: 51 + // 52 + // SPINDLE_CGROUP_INTEGRATION=1 systemd-run --user --scope -p Delegate=yes \ 53 + // go test -run TestCgroupOOMEnforcement ./spindle/engines/microvm/ 54 + func TestCgroupOOMEnforcement(t *testing.T) { 55 + if os.Getenv("SPINDLE_CGROUP_INTEGRATION") != "1" { 56 + t.Skip("see test doc comment on how to run") 57 + } 58 + if cgroups.Mode() != cgroups.Unified { 59 + t.Skip("requires cgroup v2 unified mode") 60 + } 61 + 62 + logger := slog.Default() 63 + 64 + parent, err := initCgroupParent(cgroupParentSelf, 0, logger) 65 + if err != nil { 66 + t.Skipf("cannot initialize cgroup parent (need cgroup v2 delegation): %v", err) 67 + } 68 + 69 + swap := int64(0) // disable swap so the limit forces an OOM promptly 70 + handle, err := prepareCgroup(CgroupLimits{ 71 + Enabled: true, 72 + Parent: parent, 73 + Name: "cgtest-oom", 74 + MemoryMaxMiB: 64, 75 + SwapMaxMiB: &swap, 76 + PidsMax: 256, 77 + }, logger) 78 + if err != nil { 79 + t.Skipf("cannot create a memory-limited child cgroup (need the memory controller delegated): %v", err) 80 + } 81 + if handle == nil { 82 + t.Fatal("prepareCgroup returned a nil handle for enabled limits") 83 + } 84 + t.Cleanup(func() { _ = handle.Close() }) 85 + 86 + cmd := exec.Command(os.Args[0]) 87 + cmd.Env = append(os.Environ(), memhogEnv+"=1") 88 + stdin, err := cmd.StdinPipe() 89 + if err != nil { 90 + t.Fatal(err) 91 + } 92 + if err := cmd.Start(); err != nil { 93 + t.Fatal(err) 94 + } 95 + defer func() { 96 + _ = cmd.Process.Kill() 97 + _ = cmd.Wait() 98 + }() 99 + 100 + if err := handle.AddProcess(cmd.Process.Pid, logger); err != nil { 101 + t.Fatalf("add memhog to cgroup: %v", err) 102 + } 103 + 104 + // let the child process start allocating memory 105 + if _, err := stdin.Write([]byte("g")); err != nil { 106 + t.Fatalf("release memhog: %v", err) 107 + } 108 + _ = stdin.Close() 109 + 110 + waitErr := make(chan error, 1) 111 + go func() { waitErr <- cmd.Wait() }() 112 + 113 + select { 114 + case err := <-waitErr: 115 + if err == nil { 116 + t.Fatal("memhog exited cleanly: the cgroup memory limit was not enforced") 117 + } 118 + t.Logf("memhog died as expected: %v", err) 119 + case <-time.After(30 * time.Second): 120 + t.Fatal("memhog did not die within 30s, cgroup memory limit not enforced") 121 + } 122 + 123 + if !handle.OOMKilled() { 124 + t.Fatal("OOMKilled() is false after the memhog was killed, memory.events oom_kill was not observed") 125 + } 126 + }
+39
spindle/engines/microvm/cgroup_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "testing" 5 + ) 6 + 7 + func TestSanitizeCgroupName(t *testing.T) { 8 + cases := []struct { 9 + in string 10 + want string 11 + }{ 12 + {"workflow-abc123", "workflow-abc123"}, 13 + {"a/b:c", "a-b-c"}, 14 + {"--lead--", "lead"}, 15 + {"a__b..c", "a-b-c"}, 16 + {"keep.dots_and-dashes", "keep.dots_and-dashes"}, 17 + {"", ""}, 18 + {"///", ""}, 19 + } 20 + for _, tc := range cases { 21 + if got := sanitizeCgroupName(tc.in); got != tc.want { 22 + t.Errorf("sanitizeCgroupName(%q) = %q, want %q", tc.in, got, tc.want) 23 + } 24 + } 25 + } 26 + 27 + func TestCgroupResourcesSwapOnlyStillSetsMemory(t *testing.T) { 28 + swap := int64(8) 29 + r := cgroupResources(CgroupLimits{SwapMaxMiB: &swap}) 30 + if r.Memory == nil { 31 + t.Fatal("a swap limit alone should still produce a memory controller config") 32 + } 33 + if r.Memory.Max != nil { 34 + t.Errorf("memory max should be unset when only swap is limited, got %v", *r.Memory.Max) 35 + } 36 + if r.Memory.Swap == nil || *r.Memory.Swap != 8*1024*1024 { 37 + t.Errorf("swap = %v, want %d bytes", r.Memory.Swap, 8*1024*1024) 38 + } 39 + }
+380
spindle/engines/microvm/dns_proxy.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "fmt" 7 + "log/slog" 8 + "net" 9 + "sync" 10 + "time" 11 + 12 + "github.com/miekg/dns" 13 + ) 14 + 15 + const ( 16 + dnsProxyIOTimeout = 10 * time.Second 17 + dnsProxyIdleTimeout = 30 * time.Second 18 + dnsProxyShutdownTimeout = 10 * time.Second 19 + dnsProxyMaxConnections = 64 20 + dnsProxyMaxTCPQueries = 128 21 + dnsProxyResolvConfPath = "/etc/resolv.conf" 22 + ) 23 + 24 + type DNSProxy struct { 25 + port uint32 26 + srv *dns.Server 27 + 28 + closeOnce sync.Once 29 + closeErr error 30 + } 31 + 32 + func StartDNSProxy(ctx context.Context, cid uint32, logger *slog.Logger) (*DNSProxy, error) { 33 + if ctx == nil { 34 + ctx = context.Background() 35 + } 36 + 37 + if logger == nil { 38 + logger = slog.Default() 39 + } 40 + logger = logger.With("where", "dns_proxy", "cid", cid) 41 + 42 + ln, port, err := listenRandomVsockPort(ctx) 43 + if err != nil { 44 + return nil, fmt.Errorf("listen for dns proxy: %w", err) 45 + } 46 + 47 + resolver, err := newHostDNSResolver(dnsProxyResolvConfPath, logger) 48 + if err != nil { 49 + _ = ln.Close() 50 + return nil, err 51 + } 52 + 53 + listener := newLimitedListener( 54 + &cidFilteredVsockListener{ 55 + Listener: ln, 56 + cid: cid, 57 + logger: logger, 58 + }, 59 + dnsProxyMaxConnections, 60 + logger, 61 + ) 62 + 63 + proxy := &DNSProxy{ 64 + port: port, 65 + srv: &dns.Server{ 66 + Net: "tcp", 67 + Listener: listener, 68 + Handler: dns.HandlerFunc(resolver.ServeDNS), 69 + ReadTimeout: dnsProxyIOTimeout, 70 + WriteTimeout: dnsProxyIOTimeout, 71 + IdleTimeout: func() time.Duration { return dnsProxyIdleTimeout }, 72 + MaxTCPQueries: dnsProxyMaxTCPQueries, 73 + MsgInvalidFunc: func(_ []byte, err error) { 74 + logger.Warn("dns proxy invalid message", "error", err) 75 + }, 76 + }, 77 + } 78 + 79 + go func() { 80 + <-ctx.Done() 81 + _ = proxy.Close() 82 + }() 83 + 84 + go func() { 85 + if err := proxy.srv.ActivateAndServe(); err != nil && !errors.Is(err, net.ErrClosed) { 86 + logger.Warn("dns proxy stopped", "error", err) 87 + } 88 + }() 89 + 90 + logger.Info("started dns proxy", "port", port) 91 + return proxy, nil 92 + } 93 + 94 + func (p *DNSProxy) Port() uint32 { 95 + if p == nil { 96 + return 0 97 + } 98 + return p.port 99 + } 100 + 101 + func (p *DNSProxy) Close() error { 102 + if p == nil || p.srv == nil { 103 + return nil 104 + } 105 + 106 + p.closeOnce.Do(func() { 107 + shutdownCtx, cancel := context.WithTimeout(context.Background(), dnsProxyShutdownTimeout) 108 + defer cancel() 109 + 110 + p.closeErr = p.srv.ShutdownContext(shutdownCtx) 111 + }) 112 + return p.closeErr 113 + } 114 + 115 + type limitedListener struct { 116 + net.Listener 117 + slots chan struct{} 118 + logger *slog.Logger 119 + } 120 + 121 + func newLimitedListener(listener net.Listener, limit int, logger *slog.Logger) net.Listener { 122 + if limit <= 0 { 123 + return listener 124 + } 125 + return &limitedListener{ 126 + Listener: listener, 127 + slots: make(chan struct{}, limit), 128 + logger: logger, 129 + } 130 + } 131 + 132 + func (l *limitedListener) Accept() (net.Conn, error) { 133 + for { 134 + conn, err := l.Listener.Accept() 135 + if err != nil { 136 + return nil, err 137 + } 138 + 139 + select { 140 + case l.slots <- struct{}{}: 141 + return &limitedConn{ 142 + Conn: conn, 143 + release: func() { 144 + <-l.slots 145 + }, 146 + }, nil 147 + default: 148 + l.logger.Warn("dns proxy dropped connection because workers are busy") 149 + _ = conn.Close() 150 + } 151 + } 152 + } 153 + 154 + type limitedConn struct { 155 + net.Conn 156 + once sync.Once 157 + release func() 158 + } 159 + 160 + func (c *limitedConn) Close() error { 161 + err := c.Conn.Close() 162 + c.once.Do(c.release) 163 + return err 164 + } 165 + 166 + type hostDNSResolver struct { 167 + upstreams []string 168 + attempts int 169 + timeout time.Duration 170 + logger *slog.Logger 171 + } 172 + 173 + func newHostDNSResolver(path string, logger *slog.Logger) (*hostDNSResolver, error) { 174 + config, err := dns.ClientConfigFromFile(path) 175 + if err != nil { 176 + return nil, fmt.Errorf("read host resolv.conf: %w", err) 177 + } 178 + if len(config.Servers) == 0 { 179 + return nil, fmt.Errorf("host resolv.conf has no nameservers") 180 + } 181 + 182 + port := config.Port 183 + if port == "" { 184 + port = "53" 185 + } 186 + 187 + upstreams := make([]string, 0, len(config.Servers)) 188 + for _, server := range config.Servers { 189 + upstreams = append(upstreams, net.JoinHostPort(server, port)) 190 + } 191 + 192 + timeout := time.Duration(config.Timeout) * time.Second 193 + if timeout <= 0 { 194 + timeout = dnsProxyIOTimeout 195 + } 196 + 197 + return &hostDNSResolver{ 198 + upstreams: upstreams, 199 + attempts: max(config.Attempts, 1), 200 + timeout: timeout, 201 + logger: logger, 202 + }, nil 203 + } 204 + 205 + func (r *hostDNSResolver) ServeDNS(w dns.ResponseWriter, req *dns.Msg) { 206 + resp, err := r.exchange(req) 207 + if err != nil { 208 + r.logger.Warn( 209 + "dns upstream exchange failed", 210 + "question", dnsQuestionLogValue(req), 211 + "error", err, 212 + ) 213 + if err := w.WriteMsg(rcodeResponse(req, dns.RcodeServerFailure)); err != nil { 214 + r.logger.Warn("dns proxy response write failed", "error", err) 215 + } 216 + return 217 + } 218 + 219 + filterDNSResponse(resp) 220 + 221 + if err := w.WriteMsg(resp); err != nil { 222 + r.logger.Warn("dns proxy response write failed", "error", err) 223 + } 224 + } 225 + 226 + func (r *hostDNSResolver) exchange(req *dns.Msg) (*dns.Msg, error) { 227 + var errs []error 228 + 229 + for range r.attempts { 230 + for _, upstream := range r.upstreams { 231 + resp, err := exchangeDNSAt(req, upstream, r.timeout) 232 + if err == nil { 233 + return resp, nil 234 + } 235 + errs = append(errs, fmt.Errorf("%s: %w", upstream, err)) 236 + } 237 + } 238 + 239 + return nil, errors.Join(errs...) 240 + } 241 + 242 + func exchangeDNSAt(req *dns.Msg, addr string, timeout time.Duration) (*dns.Msg, error) { 243 + resp, _, err := (&dns.Client{Net: "udp", Timeout: timeout}).Exchange(req, addr) 244 + if err != nil { 245 + return nil, err 246 + } 247 + if resp == nil { 248 + return nil, fmt.Errorf("empty udp response") 249 + } 250 + if !resp.Truncated { 251 + return resp, nil 252 + } 253 + 254 + resp, _, err = (&dns.Client{Net: "tcp", Timeout: timeout}).Exchange(req, addr) 255 + if err != nil { 256 + return nil, err 257 + } 258 + if resp == nil { 259 + return nil, fmt.Errorf("empty tcp response") 260 + } 261 + return resp, nil 262 + } 263 + 264 + func filterDNSResponse(msg *dns.Msg) { 265 + if msg == nil { 266 + return 267 + } 268 + msg.Answer = filterDNSRRs(msg.Answer) 269 + msg.Ns = filterDNSRRs(msg.Ns) 270 + msg.Extra = filterDNSRRs(msg.Extra) 271 + } 272 + 273 + func filterDNSRRs(rrs []dns.RR) []dns.RR { 274 + filtered := rrs[:0] 275 + for _, rr := range rrs { 276 + if rr := filterDNSRR(rr); rr != nil { 277 + filtered = append(filtered, rr) 278 + } 279 + } 280 + return filtered 281 + } 282 + 283 + func filterDNSRR(rr dns.RR) dns.RR { 284 + switch rr := rr.(type) { 285 + case *dns.A: 286 + if isBlockedNamespaceIP(rr.A) { 287 + return nil 288 + } 289 + case *dns.AAAA: 290 + if isBlockedNamespaceIP(rr.AAAA) { 291 + return nil 292 + } 293 + case *dns.SVCB: 294 + filterSVCBValues(&rr.Value) 295 + case *dns.HTTPS: 296 + filterSVCBValues(&rr.Value) 297 + } 298 + return rr 299 + } 300 + 301 + // this removes any blocked namespaces in ipv4/v6 hints 302 + func filterSVCBValues(values *[]dns.SVCBKeyValue) { 303 + filtered := (*values)[:0] 304 + for _, value := range *values { 305 + switch value := value.(type) { 306 + case *dns.SVCBIPv4Hint: 307 + value.Hint = filterDNSIPs(value.Hint) 308 + if len(value.Hint) == 0 { 309 + continue 310 + } 311 + case *dns.SVCBIPv6Hint: 312 + value.Hint = filterDNSIPs(value.Hint) 313 + if len(value.Hint) == 0 { 314 + continue 315 + } 316 + } 317 + filtered = append(filtered, value) 318 + } 319 + *values = filtered 320 + } 321 + 322 + func filterDNSIPs(ips []net.IP) []net.IP { 323 + filtered := ips[:0] 324 + for _, ip := range ips { 325 + if !isBlockedNamespaceIP(ip) { 326 + filtered = append(filtered, ip) 327 + } 328 + } 329 + return filtered 330 + } 331 + 332 + func isBlockedNamespaceIP(ip net.IP) bool { 333 + if ip == nil { 334 + return true 335 + } 336 + if ip4 := ip.To4(); ip4 != nil { 337 + return isBlockedByNamespaceNets(ip4, 32) 338 + } 339 + return isBlockedByNamespaceNets(ip, 128) 340 + } 341 + 342 + func isBlockedByNamespaceNets(ip net.IP, bits int) bool { 343 + for _, blockedNet := range blockedNamespaceNets { 344 + if blockedNet == nil { 345 + continue 346 + } 347 + 348 + _, blockedBits := blockedNet.Mask.Size() 349 + if blockedBits != bits { 350 + continue 351 + } 352 + if blockedNet.Contains(ip) { 353 + return true 354 + } 355 + } 356 + return false 357 + } 358 + 359 + func rcodeResponse(req *dns.Msg, rcode int) *dns.Msg { 360 + resp := new(dns.Msg) 361 + if req == nil { 362 + resp.Rcode = rcode 363 + return resp 364 + } 365 + resp.SetRcode(req, rcode) 366 + return resp 367 + } 368 + 369 + func dnsQuestionLogValue(msg *dns.Msg) string { 370 + if msg == nil || len(msg.Question) == 0 { 371 + return "" 372 + } 373 + 374 + q := msg.Question[0] 375 + qtype := dns.TypeToString[q.Qtype] 376 + if qtype == "" { 377 + qtype = fmt.Sprintf("TYPE%d", q.Qtype) 378 + } 379 + return fmt.Sprintf("%s/%s", q.Name, qtype) 380 + }
+73
spindle/engines/microvm/dns_proxy_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "net" 5 + "testing" 6 + 7 + "github.com/miekg/dns" 8 + ) 9 + 10 + func TestFilterDNSResponseDropsBlockedAddressRecords(t *testing.T) { 11 + msg := new(dns.Msg) 12 + msg.Answer = []dns.RR{ 13 + &dns.CNAME{Hdr: dns.RR_Header{Name: "cache.example.", Rrtype: dns.TypeCNAME, Class: dns.ClassINET}, Target: "edge.example."}, 14 + &dns.A{Hdr: dns.RR_Header{Name: "edge.example.", Rrtype: dns.TypeA, Class: dns.ClassINET}, A: net.ParseIP("1.1.1.1")}, 15 + &dns.A{Hdr: dns.RR_Header{Name: "edge.example.", Rrtype: dns.TypeA, Class: dns.ClassINET}, A: net.ParseIP("10.0.0.1")}, 16 + &dns.AAAA{Hdr: dns.RR_Header{Name: "edge.example.", Rrtype: dns.TypeAAAA, Class: dns.ClassINET}, AAAA: net.ParseIP("2606:4700:4700::1111")}, 17 + &dns.AAAA{Hdr: dns.RR_Header{Name: "edge.example.", Rrtype: dns.TypeAAAA, Class: dns.ClassINET}, AAAA: net.ParseIP("fd7a:115c:a1e0::53")}, 18 + } 19 + msg.Extra = []dns.RR{ 20 + &dns.A{Hdr: dns.RR_Header{Name: "private.example.", Rrtype: dns.TypeA, Class: dns.ClassINET}, A: net.ParseIP("192.168.1.2")}, 21 + &dns.A{Hdr: dns.RR_Header{Name: "public.example.", Rrtype: dns.TypeA, Class: dns.ClassINET}, A: net.ParseIP("8.8.8.8")}, 22 + } 23 + 24 + filterDNSResponse(msg) 25 + 26 + if len(msg.Answer) != 3 { 27 + t.Fatalf("filtered answer len = %d, want 3: %#v", len(msg.Answer), msg.Answer) 28 + } 29 + if _, ok := msg.Answer[0].(*dns.CNAME); !ok { 30 + t.Fatalf("answer[0] = %T, want CNAME", msg.Answer[0]) 31 + } 32 + if a, ok := msg.Answer[1].(*dns.A); !ok || !a.A.Equal(net.ParseIP("1.1.1.1")) { 33 + t.Fatalf("answer[1] = %#v, want public A", msg.Answer[1]) 34 + } 35 + if aaaa, ok := msg.Answer[2].(*dns.AAAA); !ok || !aaaa.AAAA.Equal(net.ParseIP("2606:4700:4700::1111")) { 36 + t.Fatalf("answer[2] = %#v, want public AAAA", msg.Answer[2]) 37 + } 38 + if len(msg.Extra) != 1 { 39 + t.Fatalf("filtered extra len = %d, want 1: %#v", len(msg.Extra), msg.Extra) 40 + } 41 + } 42 + 43 + func TestFilterDNSResponseFiltersSVCBAddressHints(t *testing.T) { 44 + msg := new(dns.Msg) 45 + msg.Answer = []dns.RR{ 46 + &dns.HTTPS{ 47 + SVCB: dns.SVCB{ 48 + Hdr: dns.RR_Header{Name: "svc.example.", Rrtype: dns.TypeHTTPS, Class: dns.ClassINET}, 49 + Priority: 1, 50 + Target: ".", 51 + Value: []dns.SVCBKeyValue{ 52 + &dns.SVCBIPv4Hint{Hint: []net.IP{net.ParseIP("10.0.0.1"), net.ParseIP("8.8.8.8")}}, 53 + &dns.SVCBIPv6Hint{Hint: []net.IP{net.ParseIP("fd7a:115c:a1e0::53"), net.ParseIP("2001:4860:4860::8888")}}, 54 + }, 55 + }, 56 + }, 57 + } 58 + 59 + filterDNSResponse(msg) 60 + 61 + https := msg.Answer[0].(*dns.HTTPS) 62 + if len(https.Value) != 2 { 63 + t.Fatalf("https values len = %d, want 2", len(https.Value)) 64 + } 65 + ipv4 := https.Value[0].(*dns.SVCBIPv4Hint) 66 + if len(ipv4.Hint) != 1 || !ipv4.Hint[0].Equal(net.ParseIP("8.8.8.8")) { 67 + t.Fatalf("ipv4 hints = %v, want [8.8.8.8]", ipv4.Hint) 68 + } 69 + ipv6 := https.Value[1].(*dns.SVCBIPv6Hint) 70 + if len(ipv6.Hint) != 1 || !ipv6.Hint[0].Equal(net.ParseIP("2001:4860:4860::8888")) { 71 + t.Fatalf("ipv6 hints = %v, want [2001:4860:4860::8888]", ipv6.Hint) 72 + } 73 + }
+517
spindle/engines/microvm/engine.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "errors" 7 + "fmt" 8 + "io" 9 + "log/slog" 10 + "os" 11 + "slices" 12 + "sync" 13 + "sync/atomic" 14 + "time" 15 + 16 + "gopkg.in/yaml.v3" 17 + 18 + "tangled.org/core/api/tangled" 19 + "tangled.org/core/log" 20 + "tangled.org/core/spindle/agentproto" 21 + agentv1 "tangled.org/core/spindle/agentproto/gen" 22 + "tangled.org/core/spindle/config" 23 + "tangled.org/core/spindle/db" 24 + "tangled.org/core/spindle/engine" 25 + "tangled.org/core/spindle/models" 26 + "tangled.org/core/spindle/secrets" 27 + ) 28 + 29 + const ( 30 + guestWorkDir = "/workspace/repo" 31 + activationStepAction = "activate-config" 32 + agentAcceptTimeout = 2 * time.Minute 33 + agentHandshakeTimeout = 30 * time.Second 34 + cacheDrainTimeout = 5 * time.Minute 35 + vmShutdownTimeout = 10 * time.Second 36 + guestTimeoutGrace = 5 * time.Second 37 + ) 38 + 39 + type cleanupFunc func(context.Context) error 40 + 41 + type Engine struct { 42 + l *slog.Logger 43 + cfg *config.Config 44 + db *db.DB 45 + agent *agentHub 46 + scheduler *engine.ResourceScheduler[Resources] 47 + cgroupParent *CgroupParent 48 + 49 + cleanupMu sync.Mutex 50 + cleanup map[string][]cleanupFunc 51 + } 52 + 53 + type Step struct { 54 + name string 55 + kind models.StepKind 56 + command string 57 + environment map[string]string 58 + action string 59 + config manifestConfig 60 + configKey string 61 + } 62 + 63 + func (s Step) Name() string { return s.name } 64 + func (s Step) Command() string { return s.command } 65 + func (s Step) Kind() models.StepKind { return s.kind } 66 + 67 + func New(ctx context.Context, cfg *config.Config, d *db.DB) (*Engine, error) { 68 + l := log.FromContext(ctx).With("component", "engine.microvm") 69 + port := cfg.MicroVMPipelines.AgentPort 70 + if port == 0 { 71 + port = agentproto.DefaultPort 72 + } 73 + agent, err := newAgentHub(port, l) 74 + if err != nil { 75 + return nil, err 76 + } 77 + budget, max, agingThreshold := newVMBudgetConfig(cfg.MicroVMPipelines) 78 + l.Info("initialized microVM workflow budget", "budget", budget.String(), "maxWorkflow", max.String(), "agingThreshold", agingThreshold) 79 + 80 + var cgroupParent *CgroupParent 81 + if cfg.MicroVMPipelines.EnableCgroups { 82 + cgroupParent, err = initCgroupParent(cfg.MicroVMPipelines.CgroupParent, cfg.MicroVMPipelines.CgroupSupervisorMemoryMinMiB, l) 83 + if err != nil { 84 + return nil, err 85 + } 86 + } 87 + 88 + return &Engine{ 89 + l: l, 90 + cfg: cfg, 91 + db: d, 92 + agent: agent, 93 + scheduler: engine.NewResourceScheduler[Resources](budget, max, agingThreshold), 94 + cgroupParent: cgroupParent, 95 + cleanup: make(map[string][]cleanupFunc), 96 + }, nil 97 + } 98 + 99 + func (e *Engine) InitWorkflow(twf tangled.Pipeline_Workflow, tpl tangled.Pipeline) (*models.Workflow, error) { 100 + swf := &models.Workflow{} 101 + var dwf manifestWorkflow 102 + 103 + if err := yaml.Unmarshal([]byte(twf.Raw), &dwf); err != nil { 104 + return nil, err 105 + } 106 + 107 + for _, dstep := range dwf.Steps { 108 + swf.Steps = append(swf.Steps, Step{ 109 + name: dstep.Name, 110 + kind: models.StepKindUser, 111 + command: dstep.Command, 112 + environment: dstep.Environment, 113 + }) 114 + } 115 + swf.Name = twf.Name 116 + swf.Environment = dwf.Environment 117 + 118 + if tpl.TriggerMetadata != nil { 119 + if clone := models.BuildCloneStep(twf, *tpl.TriggerMetadata, e.cfg.Server.Dev); clone.Command() != "" { 120 + swf.Steps = append([]models.Step{clone}, swf.Steps...) 121 + } 122 + } 123 + 124 + imageSpec, imageSpecPath, imageName, err := e.resolveImage(dwf.Image) 125 + if err != nil { 126 + return nil, err 127 + } 128 + configKey := "" 129 + config := manifestConfig{ 130 + Services: dwf.Services, 131 + Virtualisation: dwf.Virtualisation, 132 + Dependencies: dwf.Dependencies, 133 + Registry: dwf.Registry, 134 + } 135 + if config.Enabled() { 136 + if !imageSpec.SupportsConfigActivation() { 137 + return nil, fmt.Errorf( 138 + "microVM image %q is not a NixOS image: services, virtualisation, dependencies and registry workflow options require a NixOS image", 139 + imageName, 140 + ) 141 + } 142 + var err error 143 + configKey, err = buildConfigKey(imageSpec, config) 144 + if err != nil { 145 + return nil, fmt.Errorf("build config key: %w", err) 146 + } 147 + activationStep := Step{ 148 + name: "NixOS config activation", 149 + kind: models.StepKindSystem, 150 + command: "activate nixos config", 151 + action: activationStepAction, 152 + config: config, 153 + configKey: configKey, 154 + } 155 + 156 + insertAt := 0 157 + if len(swf.Steps) > 0 && swf.Steps[0].Kind() == models.StepKindSystem { 158 + insertAt = 1 159 + } 160 + swf.Steps = append(swf.Steps, nil) 161 + copy(swf.Steps[insertAt+1:], swf.Steps[insertAt:]) 162 + swf.Steps[insertAt] = activationStep 163 + } 164 + 165 + cacheURLs, cacheKeys, err := workflowCaches(dwf.Caches) 166 + if err != nil { 167 + return nil, err 168 + } 169 + 170 + swf.Data = &workflowState{ 171 + ImageSpec: imageSpec, 172 + ImageSpecPath: imageSpecPath, 173 + Config: config, 174 + ConfigKey: configKey, 175 + Image: imageName, 176 + CacheReadURLs: cacheURLs, 177 + CacheTrustedPublicKeys: cacheKeys, 178 + NixOSToplevelCache: newNixOSToplevelCacheStore(e.db), 179 + } 180 + return swf, nil 181 + } 182 + 183 + func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow, wfLogger models.WorkflowLogger) error { 184 + l := e.l.With("workflow", wid) 185 + setupStep := Step{name: "microVM setup", kind: models.StepKindSystem} 186 + 187 + wfLogger.ControlWriter(-1, setupStep, models.StepStatusStart).Write([]byte{0}) 188 + defer wfLogger.ControlWriter(-1, setupStep, models.StepStatusEnd).Write([]byte{0}) 189 + 190 + state, ok := wf.Data.(*workflowState) 191 + if !ok || state == nil { 192 + return fmt.Errorf("workflow state is not initialized") 193 + } 194 + 195 + cid, err := AllocateCID() 196 + if err != nil { 197 + return err 198 + } 199 + connCh, unregister, err := e.agent.expect(cid) 200 + if err != nil { 201 + return err 202 + } 203 + defer unregister() 204 + 205 + workDirBase := e.cfg.MicroVMPipelines.OverlayDir 206 + if workDirBase == "" { 207 + workDirBase = os.TempDir() 208 + } 209 + workDir, err := os.MkdirTemp(workDirBase, "spindle-microvm-"+wid.String()+"-*") 210 + if err != nil { 211 + return fmt.Errorf("create workflow microVM directory: %w", err) 212 + } 213 + state.WorkDir = workDir 214 + 215 + setupDone := false 216 + defer func() { 217 + if setupDone { 218 + return 219 + } 220 + if err := e.cleanupState(context.Background(), wid, state); err != nil { 221 + l.Error("failed to cleanup failed setup", "error", err) 222 + } 223 + }() 224 + 225 + upstreams, err := BuildCacheUpstreams(e.cfg.NixCache.ReadURLs, state.CacheReadURLs) 226 + if err != nil { 227 + return err 228 + } 229 + readCache, err := StartReadCacheProxy(ctx, cid, upstreams, l) 230 + if err != nil { 231 + return err 232 + } 233 + state.ReadCache = readCache 234 + uploadCache, err := StartUploadCacheProxy(ctx, cid, e.cfg.NixCache.UploadURL, upstreams, l) 235 + if err != nil { 236 + return err 237 + } 238 + state.UploadCache = uploadCache 239 + dnsProxy, err := StartDNSProxy(ctx, cid, l) 240 + if err != nil { 241 + return err 242 + } 243 + state.DNSProxy = dnsProxy 244 + 245 + port := e.cfg.MicroVMPipelines.AgentPort 246 + if port == 0 { 247 + port = agentproto.DefaultPort 248 + } 249 + state.ImageSpec.BootArgs = fmt.Sprintf("%s shuttle.vsock_port=%d", state.ImageSpec.BootArgs, port) 250 + 251 + fmt.Fprintf(wfLogger.DataWriter(-1, "stdout"), "starting microVM image %s\n", state.Image) 252 + l.Info("starting microVM workflow", "image", state.Image, "imageSpec", state.ImageSpecPath, "cid", cid, "workDir", workDir) 253 + 254 + var vm VMHandle 255 + vm, err = StartVM(ctx, VMConfig{ 256 + Image: state.ImageSpec, 257 + CID: cid, 258 + EnableKVM: e.cfg.MicroVMPipelines.EnableKVM, 259 + WorkDir: workDir, 260 + Cgroup: e.cgroupLimits(wid, state.ImageSpec), 261 + Dev: e.cfg.Server.Dev, 262 + }, l) 263 + if err != nil { 264 + return err 265 + } 266 + state.VM = vm 267 + 268 + acceptCtx, cancelAccept := context.WithTimeout(ctx, agentAcceptTimeout) 269 + defer cancelAccept() 270 + conn, err := waitAgentConn(acceptCtx, connCh) 271 + if err != nil { 272 + return err 273 + } 274 + 275 + agentSession := NewAgentSession(conn, l) 276 + initCtx, cancelInit := context.WithTimeout(ctx, agentHandshakeTimeout) 277 + defer cancelInit() 278 + if err := agentSession.Init(initCtx, &agentv1.Init{ 279 + JobId: wid.String(), 280 + CacheTrustedPublicKeys: append(slices.Clone(e.cfg.NixCache.TrustedPublicKeys), state.CacheTrustedPublicKeys...), 281 + CacheReadProxyPort: readCache.Port(), 282 + CacheUploadProxyPort: uploadCache.Port(), 283 + DnsProxyPort: dnsProxy.Port(), 284 + }); err != nil { 285 + _ = agentSession.Close() 286 + return err 287 + } 288 + state.Agent = agentSession 289 + wf.Data = state 290 + 291 + e.registerCleanup(wid, func(ctx context.Context) error { 292 + return e.cleanupState(ctx, wid, state) 293 + }) 294 + setupDone = true 295 + 296 + fmt.Fprintf(wfLogger.DataWriter(-1, "stdout"), 297 + "agent connected; serial log: %s\n", vm.Logs().Serial, 298 + ) 299 + return nil 300 + } 301 + 302 + func (e *Engine) RunStep(ctx context.Context, wid models.WorkflowId, w *models.Workflow, idx int, secrets []secrets.UnlockedSecret, wfLogger models.WorkflowLogger) error { 303 + state, ok := w.Data.(*workflowState) 304 + if !ok || state == nil || state.Agent == nil { 305 + return fmt.Errorf("microVM workflow is not connected to agent") 306 + } 307 + 308 + stderr := wfLogger.DataWriter(idx, "stderr") 309 + 310 + execCtx, vmExited, cancelWatch := watchVMExit(ctx, state.VM) 311 + defer cancelWatch() 312 + 313 + step := w.Steps[idx] 314 + if s, ok := step.(Step); ok && s.action == activationStepAction { 315 + err := e.activateConfig(execCtx, wid, state, s, wfLogger.DataWriter(idx, "stdout")) 316 + return e.classifyStepError(ctx, wid, step, state, stderr, vmExited, err) 317 + } 318 + env := []string{ 319 + "HOME=/workspace", 320 + "LOGNAME=" + guestWorkflowUser, 321 + "PATH=/run/current-system/sw/bin:/nix/var/nix/profiles/default/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 322 + "USER=" + guestWorkflowUser, 323 + } 324 + for k, v := range w.Environment { 325 + env = append(env, k+"="+v) 326 + } 327 + for _, s := range secrets { 328 + env = append(env, s.Key+"="+s.Value) 329 + } 330 + if s, ok := step.(Step); ok { 331 + for k, v := range s.environment { 332 + env = append(env, k+"="+v) 333 + } 334 + } 335 + 336 + stdout := wfLogger.DataWriter(idx, "stdout") 337 + exitCode, err := state.Agent.Exec(execCtx, AgentExec{ 338 + ID: fmt.Sprintf("%s-%d", wid.String(), idx), 339 + ExecStart: &agentv1.ExecStart{ 340 + Argv: []string{state.ImageSpec.Shell, "-lc", step.Command()}, 341 + Env: env, 342 + Cwd: guestWorkDir, 343 + User: guestWorkflowUser, 344 + // timeout not set here, Exec will fill it 345 + }, 346 + Stdout: stdout, 347 + Stderr: stderr, 348 + }) 349 + if err != nil { 350 + return e.classifyStepError(ctx, wid, step, state, stderr, vmExited, err) 351 + } 352 + 353 + if exitCode != 0 { 354 + e.l.Debug("step exited non-zero", "workflow", wid, "step", step.Name(), "exitCode", exitCode) 355 + return engine.ErrWorkflowFailed 356 + } 357 + return nil 358 + } 359 + 360 + // reads the vm serial logs so we report the tail of that as an error instead of 361 + // just "guest agent connection lost: EOF" 362 + func (e *Engine) classifyStepError(ctx context.Context, wid models.WorkflowId, step models.Step, state *workflowState, stderr io.Writer, vmExited *atomic.Bool, err error) error { 363 + if err == nil { 364 + return nil 365 + } 366 + l := e.l.With("workflow", wid, "step", step.Name()) 367 + 368 + if vmExited != nil && vmExited.Load() { 369 + reason := "microVM exited unexpectedly" 370 + oom := state.VM != nil && state.VM.OOMKilled() 371 + if oom { 372 + reason = "microVM killed by OOM (cgroup memory limit exceeded)" 373 + } 374 + if detail := vmCrashLog(state.VM); detail != "" { 375 + fmt.Fprintf(stderr, "%s:\n%s\n", reason, detail) 376 + l.Debug(reason, "oom", oom, "detail", detail) 377 + } else { 378 + fmt.Fprintln(stderr, reason) 379 + l.Debug(reason, "oom", oom) 380 + } 381 + return errors.New(reason + "; see workflow logs for serial output") 382 + } 383 + 384 + if errors.Is(err, errGuestTimedOut) || ctx.Err() != nil { 385 + l.Debug("step timed out", "guestReported", errors.Is(err, errGuestTimedOut)) 386 + return engine.ErrTimedOut 387 + } 388 + 389 + // the agent connection dropped while qemu stayed up (eg. the guest kernel 390 + // OOM-killed the agent or a guest panic), so surface serial logs, those 391 + // will be more helpful. 392 + if detail := vmCrashLog(state.VM); detail != "" { 393 + fmt.Fprintf(stderr, "step failed (%v):\n%s\n", err, detail) 394 + l.Debug("step failed", "error", err, "detail", detail) 395 + } else { 396 + l.Debug("step failed", "error", err) 397 + } 398 + return err 399 + } 400 + 401 + func (e *Engine) activateConfig(ctx context.Context, wid models.WorkflowId, state *workflowState, step Step, out io.Writer) error { 402 + cfg := step.config 403 + if !cfg.Enabled() { 404 + return nil 405 + } 406 + 407 + configKey := step.configKey 408 + if configKey == "" { 409 + configKey = state.ConfigKey 410 + } 411 + 412 + userConfigJSON, err := json.Marshal(cfg) 413 + if err != nil { 414 + return fmt.Errorf("encode user config: %w", err) 415 + } 416 + 417 + var cachedToplevel string 418 + if configKey != "" { 419 + if record, ok, err := state.NixOSToplevelCache.Lookup(configKey); err != nil { 420 + return err 421 + } else if ok { 422 + cachedToplevel = record.Toplevel 423 + fmt.Fprintf(out, "realizing cached NixOS config %s\n", cachedToplevel) 424 + } 425 + } 426 + if cachedToplevel == "" { 427 + fmt.Fprintf(out, "building NixOS config from user config\n") 428 + } 429 + 430 + baseHash, err := BaseConfigHash(state.ImageSpec) 431 + if err != nil { 432 + return fmt.Errorf("calculate base config hash: %w", err) 433 + } 434 + 435 + result, err := state.Agent.ActivateConfig(ctx, fmt.Sprintf("%s-config", wid.String()), &agentv1.ActivateConfig{ 436 + ConfigKey: configKey, 437 + BaseConfigHash: baseHash, 438 + UserConfig: string(userConfigJSON), 439 + Toplevel: cachedToplevel, 440 + }) 441 + if err != nil { 442 + return err 443 + } 444 + fmt.Fprintf(out, "activated NixOS config toplevel %s\n", result.Toplevel) 445 + 446 + if cachedToplevel != "" || configKey == "" { 447 + return nil 448 + } 449 + if e.cfg.NixCache.UploadURL == "" { 450 + e.l.Warn("not committing config cache metadata: no upload URL configured", "workflow", wid, "configKey", configKey, "toplevel", result.Toplevel) 451 + return nil 452 + } 453 + 454 + drainCtx, cancel := context.WithTimeout(ctx, cacheDrainTimeout) 455 + defer cancel() 456 + if _, err := state.Agent.Drain(drainCtx); err != nil { 457 + return fmt.Errorf("drain config cache uploads before metadata commit: %w", err) 458 + } 459 + if err := state.NixOSToplevelCache.Commit(configKey, result.Toplevel); err != nil { 460 + return err 461 + } 462 + fmt.Fprintf(out, "committed config cache metadata %s -> %s\n", configKey, result.Toplevel) 463 + return nil 464 + } 465 + 466 + func (e *Engine) DestroyWorkflow(ctx context.Context, wid models.WorkflowId) error { 467 + fns := e.drainCleanups(wid) 468 + 469 + var cleanupErr error 470 + for i := len(fns) - 1; i >= 0; i-- { 471 + if err := fns[i](ctx); err != nil { 472 + e.l.Error("failed to cleanup workflow resource", "workflowId", wid, "error", err) 473 + cleanupErr = errors.Join(cleanupErr, err) 474 + } 475 + } 476 + return cleanupErr 477 + } 478 + 479 + func (e *Engine) FinalizeWorkflow(ctx context.Context, wid models.WorkflowId, w *models.Workflow, wfLogger models.WorkflowLogger) error { 480 + return nil 481 + } 482 + 483 + func (e *Engine) WorkflowTimeout() time.Duration { 484 + d, err := time.ParseDuration(e.cfg.MicroVMPipelines.WorkflowTimeout) 485 + if err != nil { 486 + d = 5 * time.Minute 487 + } 488 + return d + guestTimeoutGrace 489 + } 490 + 491 + func (e *Engine) registerCleanup(wid models.WorkflowId, fn cleanupFunc) { 492 + e.cleanupMu.Lock() 493 + defer e.cleanupMu.Unlock() 494 + key := wid.String() 495 + e.cleanup[key] = append(e.cleanup[key], fn) 496 + } 497 + 498 + func (e *Engine) drainCleanups(wid models.WorkflowId) []cleanupFunc { 499 + e.cleanupMu.Lock() 500 + defer e.cleanupMu.Unlock() 501 + key := wid.String() 502 + fns := e.cleanup[key] 503 + delete(e.cleanup, key) 504 + return fns 505 + } 506 + 507 + func (e *Engine) cgroupLimits(wid models.WorkflowId, spec ImageSpec) CgroupLimits { 508 + cfg := e.cfg.MicroVMPipelines 509 + return CgroupLimits{ 510 + Enabled: cfg.EnableCgroups, 511 + Parent: e.cgroupParent, 512 + Name: "workflow-" + wid.String(), 513 + MemoryMaxMiB: resourcesForImage(spec).MemoryMiB, 514 + SwapMaxMiB: cfg.CgroupSwapMaxMiB, 515 + PidsMax: cfg.CgroupPidsMax, 516 + } 517 + }
+116
spindle/engines/microvm/engine_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "encoding/json" 5 + "log/slog" 6 + "os" 7 + "path/filepath" 8 + "strings" 9 + "testing" 10 + 11 + "tangled.org/core/api/tangled" 12 + "tangled.org/core/spindle/config" 13 + ) 14 + 15 + func writeTestImageSpec(t *testing.T, dir, name string, spec ImageSpec) { 16 + t.Helper() 17 + data, err := json.Marshal(spec) 18 + if err != nil { 19 + t.Fatal(err) 20 + } 21 + if err := os.WriteFile(filepath.Join(dir, name+".json"), data, 0o644); err != nil { 22 + t.Fatal(err) 23 + } 24 + } 25 + 26 + func testEngine(t *testing.T, imageDir string) *Engine { 27 + t.Helper() 28 + return &Engine{ 29 + l: slog.Default(), 30 + cfg: &config.Config{ 31 + MicroVMPipelines: config.MicroVMPipelines{ 32 + ImageDir: imageDir, 33 + DefaultImage: "alpine", 34 + }, 35 + }, 36 + } 37 + } 38 + 39 + func TestInitWorkflowRejectsConfigOnNonNixOSImage(t *testing.T) { 40 + dir := t.TempDir() 41 + writeTestImageSpec(t, dir, "alpine", validImageSpec()) 42 + 43 + e := testEngine(t, dir) 44 + _, err := e.InitWorkflow(tangled.Pipeline_Workflow{ 45 + Raw: ` 46 + image: alpine 47 + dependencies: 48 + - nixpkgs#hello 49 + steps: 50 + - name: hello 51 + command: hello 52 + `, 53 + }, tangled.Pipeline{}) 54 + if err == nil { 55 + t.Fatal("expected error for NixOS config options on a non-NixOS image") 56 + } 57 + if !strings.Contains(err.Error(), "NixOS") { 58 + t.Fatalf("error should mention NixOS images, got: %v", err) 59 + } 60 + } 61 + 62 + func TestInitWorkflowPlainStepsOnNonNixOSImage(t *testing.T) { 63 + dir := t.TempDir() 64 + writeTestImageSpec(t, dir, "alpine", validImageSpec()) 65 + 66 + e := testEngine(t, dir) 67 + wf, err := e.InitWorkflow(tangled.Pipeline_Workflow{ 68 + Raw: ` 69 + image: alpine 70 + steps: 71 + - name: hello 72 + command: echo hello 73 + `, 74 + }, tangled.Pipeline{}) 75 + if err != nil { 76 + t.Fatal(err) 77 + } 78 + if len(wf.Steps) != 1 { 79 + t.Fatalf("expected exactly the user step, got %d steps", len(wf.Steps)) 80 + } 81 + state, ok := wf.Data.(*workflowState) 82 + if !ok { 83 + t.Fatal("workflow data is not workflowState") 84 + } 85 + if state.ConfigKey != "" { 86 + t.Fatalf("non-NixOS workflow should not have a config key, got %q", state.ConfigKey) 87 + } 88 + } 89 + 90 + func TestInitWorkflowConfigOnNixOSImage(t *testing.T) { 91 + dir := t.TempDir() 92 + spec := validImageSpec() 93 + spec.BaseConfigHash = "abcdef123456" 94 + writeTestImageSpec(t, dir, "nixos", spec) 95 + 96 + e := testEngine(t, dir) 97 + wf, err := e.InitWorkflow(tangled.Pipeline_Workflow{ 98 + Raw: ` 99 + image: nixos 100 + dependencies: 101 + - nixpkgs#hello 102 + steps: 103 + - name: hello 104 + command: hello 105 + `, 106 + }, tangled.Pipeline{}) 107 + if err != nil { 108 + t.Fatal(err) 109 + } 110 + if len(wf.Steps) != 2 { 111 + t.Fatalf("expected activation step + user step, got %d steps", len(wf.Steps)) 112 + } 113 + if step, ok := wf.Steps[0].(Step); !ok || step.action != activationStepAction { 114 + t.Fatalf("first step should be the activation step, got %+v", wf.Steps[0]) 115 + } 116 + }
+254
spindle/engines/microvm/image.go
··· 1 + package microvm 2 + 3 + import ( 4 + "encoding/json" 5 + "errors" 6 + "fmt" 7 + "os" 8 + "path/filepath" 9 + "strings" 10 + ) 11 + 12 + const imageSpecFileName = "spec.json" 13 + 14 + type RunnerConfig struct { 15 + CPU string `json:"cpu,omitempty"` 16 + Machine string `json:"machine,omitempty"` 17 + Console string `json:"console,omitempty"` 18 + ExtraArgs []string `json:"extraArgs,omitempty"` 19 + } 20 + 21 + type ImageSpec struct { 22 + Arch string `json:"arch"` 23 + BootArgs string `json:"bootArgs"` 24 + Initrd string `json:"initrd"` 25 + Kernel string `json:"kernel"` 26 + RunnerType string `json:"runnerType"` 27 + RunnerConfig RunnerConfig `json:"runnerConfig"` 28 + MemoryMiB int `json:"memoryMiB"` 29 + NetworkInterfaces []NetworkInterface `json:"networkInterfaces"` 30 + StoreDisk string `json:"storeDisk"` 31 + StoreDiskType string `json:"storeDiskType"` 32 + // baseConfigHash identifies the base nixos configuration baked into the 33 + // image. its only for nixos images as other images won't have a system 34 + // to rebuild. 35 + BaseConfigHash string `json:"baseConfigHash,omitempty"` 36 + // shell is the login shell used to run workflow step commands in the guest. 37 + Shell string `json:"shell"` 38 + VCPUs int `json:"vcpus"` 39 + Volumes []Volume `json:"volumes"` 40 + } 41 + 42 + func (s ImageSpec) SupportsConfigActivation() bool { 43 + return s.BaseConfigHash != "" 44 + } 45 + 46 + type NetworkInterface struct { 47 + Type string `json:"type"` 48 + ID string `json:"id"` 49 + MAC string `json:"mac"` 50 + } 51 + 52 + type Volume struct { 53 + FSType string `json:"fsType"` 54 + Image string `json:"image"` 55 + ImageType string `json:"imageType"` 56 + MountPoint string `json:"mountPoint"` 57 + ReadOnly bool `json:"readOnly"` 58 + SizeMiB int64 `json:"sizeMiB"` 59 + } 60 + 61 + func LoadImageSpec(path string) (ImageSpec, error) { 62 + data, err := os.ReadFile(path) 63 + if err != nil { 64 + return ImageSpec{}, fmt.Errorf("read microvm image spec: %w", err) 65 + } 66 + 67 + var spec ImageSpec 68 + if err := json.Unmarshal(data, &spec); err != nil { 69 + return ImageSpec{}, fmt.Errorf("parse microvm image spec: %w", err) 70 + } 71 + 72 + base := filepath.Dir(path) 73 + spec.Kernel = resolveImageSpecPath(base, spec.Kernel) 74 + spec.Initrd = resolveImageSpecPath(base, spec.Initrd) 75 + spec.StoreDisk = resolveImageSpecPath(base, spec.StoreDisk) 76 + 77 + if err := spec.Validate(); err != nil { 78 + return ImageSpec{}, err 79 + } 80 + return spec, nil 81 + } 82 + 83 + func (s ImageSpec) Validate() error { 84 + if s.Kernel == "" { 85 + return fmt.Errorf("microvm image spec missing kernel") 86 + } 87 + if s.Initrd == "" { 88 + return fmt.Errorf("microvm image spec missing initrd") 89 + } 90 + if s.StoreDisk == "" { 91 + return fmt.Errorf("microvm image spec missing storeDisk") 92 + } 93 + if s.BootArgs == "" { 94 + return fmt.Errorf("microvm image spec missing bootArgs") 95 + } 96 + if s.Shell == "" { 97 + return fmt.Errorf("microvm image spec missing shell") 98 + } 99 + if s.RunnerType == "qemu" || s.RunnerType == "" { 100 + if s.RunnerConfig.Machine == "" { 101 + return fmt.Errorf("microvm image spec missing runnerConfig.machine for qemu runner") 102 + } 103 + } 104 + if s.MemoryMiB <= 0 { 105 + return fmt.Errorf("microvm image spec memoryMiB must be positive") 106 + } 107 + if s.VCPUs <= 0 { 108 + return fmt.Errorf("microvm image spec vcpus must be positive") 109 + } 110 + for _, networkInterface := range s.NetworkInterfaces { 111 + if networkInterface.Type == "" { 112 + return fmt.Errorf("microvm image spec network interface missing type") 113 + } 114 + if networkInterface.ID == "" { 115 + return fmt.Errorf("microvm image spec network interface missing id") 116 + } 117 + if networkInterface.MAC == "" { 118 + return fmt.Errorf("microvm image spec network interface %q missing mac", networkInterface.ID) 119 + } 120 + } 121 + for _, volume := range s.Volumes { 122 + if volume.Image == "" { 123 + return fmt.Errorf("microvm image spec volume missing image") 124 + } 125 + if volume.FSType == "" { 126 + return fmt.Errorf("microvm image spec volume %q missing fsType", volume.Image) 127 + } 128 + if volume.SizeMiB <= 0 { 129 + return fmt.Errorf("microvm image spec volume %q sizeMiB must be positive", volume.Image) 130 + } 131 + } 132 + return nil 133 + } 134 + 135 + func (s ImageSpec) RunnerCmd() string { 136 + switch s.RunnerType { 137 + case "qemu", "": 138 + return "qemu-system-" + s.Arch 139 + case "firecracker": 140 + return "firecracker" 141 + default: 142 + return "" 143 + } 144 + } 145 + 146 + // also see Runner.Validate for where Runner specific files are validated 147 + func (s ImageSpec) validateImageFiles() error { 148 + required := map[string]string{ 149 + "kernel": s.Kernel, 150 + "initrd": s.Initrd, 151 + "storeDisk": s.StoreDisk, 152 + } 153 + for name, path := range required { 154 + if !filepath.IsAbs(path) { 155 + continue 156 + } 157 + if _, err := os.Stat(path); err != nil { 158 + return fmt.Errorf("required image spec file %s not found at %q: %w", name, path, err) 159 + } 160 + } 161 + 162 + return nil 163 + } 164 + 165 + func resolveImageSpecPath(base, path string) string { 166 + if path == "" || filepath.IsAbs(path) { 167 + return path 168 + } 169 + return filepath.Join(base, path) 170 + } 171 + 172 + func (e *Engine) resolveImage(name string) (ImageSpec, string, string, error) { 173 + name = strings.TrimSpace(name) 174 + if name == "" { 175 + name = strings.TrimSpace(e.cfg.MicroVMPipelines.DefaultImage) 176 + } 177 + if name == "" { 178 + return ImageSpec{}, "", "", fmt.Errorf("no image specified in workflow and SPINDLE_MICROVM_PIPELINES_DEFAULT_IMAGE is not set") 179 + } 180 + if !isPlainImageName(name) { 181 + return ImageSpec{}, "", "", fmt.Errorf("invalid microVM image name %q: must be a plain name, not a path", name) 182 + } 183 + 184 + candidates := imageCandidates(e.cfg.MicroVMPipelines.ImageDir, name) 185 + for _, candidate := range candidates { 186 + path, ok, err := imageSpecPath(candidate) 187 + if err != nil { 188 + return ImageSpec{}, "", "", err 189 + } 190 + if !ok { 191 + continue 192 + } 193 + imageSpec, err := LoadImageSpec(path) 194 + if err != nil { 195 + return ImageSpec{}, "", "", err 196 + } 197 + return imageSpec, path, name, nil 198 + } 199 + 200 + return ImageSpec{}, "", "", fmt.Errorf("microVM image %q was not found; looked in: %s", name, strings.Join(candidates, ", ")) 201 + } 202 + 203 + // check if image name is not a path 204 + func isPlainImageName(name string) bool { 205 + if name == "" || name == "." || name == ".." { 206 + return false 207 + } 208 + if filepath.IsAbs(name) || strings.ContainsRune(name, '/') || strings.ContainsRune(name, filepath.Separator) { 209 + return false 210 + } 211 + return true 212 + } 213 + 214 + // returns candidates, which is either a directory or spec file itself 215 + func imageCandidates(imageDir, name string) []string { 216 + if imageDir == "" { 217 + return nil 218 + } 219 + return []string{ 220 + filepath.Join(imageDir, name), 221 + filepath.Join(imageDir, name+".json"), 222 + } 223 + } 224 + 225 + // resolve the candidate to a spec: 226 + // - first check if its a file, if yes, return 227 + // - otherwise assume its a directory and check and return `/spec.json` 228 + func imageSpecPath(candidate string) (string, bool, error) { 229 + info, err := os.Stat(candidate) 230 + if err != nil { 231 + if errors.Is(err, os.ErrNotExist) { 232 + return "", false, nil 233 + } 234 + return "", false, err 235 + } 236 + if !info.IsDir() { 237 + return candidate, true, nil 238 + } 239 + 240 + spec := filepath.Join(candidate, imageSpecFileName) 241 + info, err = os.Stat(spec) 242 + if err != nil { 243 + if errors.Is(err, os.ErrNotExist) { 244 + return "", false, fmt.Errorf("microVM image directory %q does not contain %s", candidate, imageSpecFileName) 245 + } 246 + return "", false, err 247 + } 248 + // this only happens if there is a directory named `spec.json` which would be very silly. 249 + // but better output an error for it anyway :p 250 + if info.IsDir() { 251 + return "", false, fmt.Errorf("microVM image spec %q is a directory", spec) 252 + } 253 + return spec, true, nil 254 + }
+127
spindle/engines/microvm/image_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "encoding/json" 5 + "os" 6 + "path/filepath" 7 + "strings" 8 + "testing" 9 + ) 10 + 11 + func writeSpecFile(t *testing.T, path string) { 12 + t.Helper() 13 + data, err := json.Marshal(validImageSpec()) 14 + if err != nil { 15 + t.Fatal(err) 16 + } 17 + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { 18 + t.Fatal(err) 19 + } 20 + if err := os.WriteFile(path, data, 0o644); err != nil { 21 + t.Fatal(err) 22 + } 23 + } 24 + 25 + func TestResolveImageConventionalLayouts(t *testing.T) { 26 + cases := []struct { 27 + name string 28 + layout func(t *testing.T, dir string) 29 + }{ 30 + { 31 + name: "directory with spec.json", 32 + layout: func(t *testing.T, dir string) { 33 + writeSpecFile(t, filepath.Join(dir, "nixos", "spec.json")) 34 + }, 35 + }, 36 + { 37 + name: "flat <name>.json", 38 + layout: func(t *testing.T, dir string) { 39 + writeSpecFile(t, filepath.Join(dir, "nixos.json")) 40 + }, 41 + }, 42 + } 43 + 44 + for _, tc := range cases { 45 + t.Run(tc.name, func(t *testing.T) { 46 + dir := t.TempDir() 47 + tc.layout(t, dir) 48 + 49 + e := testEngine(t, dir) 50 + spec, path, name, err := e.resolveImage("nixos") 51 + if err != nil { 52 + t.Fatalf("resolveImage: %v", err) 53 + } 54 + if name != "nixos" { 55 + t.Fatalf("name = %q, want nixos", name) 56 + } 57 + if !strings.HasPrefix(path, dir) { 58 + t.Fatalf("resolved path %q not under image dir %q", path, dir) 59 + } 60 + if spec.Shell == "" { 61 + t.Fatal("resolved spec not loaded") 62 + } 63 + }) 64 + } 65 + } 66 + 67 + func TestResolveImageDirectoryMissingSpec(t *testing.T) { 68 + dir := t.TempDir() 69 + if err := os.MkdirAll(filepath.Join(dir, "nixos"), 0o755); err != nil { 70 + t.Fatal(err) 71 + } 72 + 73 + e := testEngine(t, dir) 74 + _, _, _, err := e.resolveImage("nixos") 75 + if err == nil || !strings.Contains(err.Error(), imageSpecFileName) { 76 + t.Fatalf("directory without %s should error, got: %v", imageSpecFileName, err) 77 + } 78 + } 79 + 80 + func TestResolveImageRejectsPaths(t *testing.T) { 81 + e := testEngine(t, t.TempDir()) 82 + for _, name := range []string{"/etc/passwd", "../evil", "sub/evil", "..", "."} { 83 + if _, _, _, err := e.resolveImage(name); err == nil || !strings.Contains(err.Error(), "must be a plain name") { 84 + t.Fatalf("name %q should be rejected as a path, got: %v", name, err) 85 + } 86 + } 87 + } 88 + 89 + func validImageSpec() ImageSpec { 90 + return ImageSpec{ 91 + Arch: "x86_64", 92 + BootArgs: "console=ttyS0", 93 + Initrd: "initrd", 94 + Kernel: "kernel", 95 + RunnerConfig: RunnerConfig{ 96 + Machine: "microvm", 97 + }, 98 + MemoryMiB: 2048, 99 + Shell: "/bin/sh", 100 + StoreDisk: "store-disk", 101 + VCPUs: 2, 102 + } 103 + } 104 + 105 + func TestImageSpecValidateWithoutBaseConfigHash(t *testing.T) { 106 + spec := validImageSpec() 107 + if err := spec.Validate(); err != nil { 108 + t.Fatalf("non-NixOS image spec should validate: %v", err) 109 + } 110 + if spec.SupportsConfigActivation() { 111 + t.Fatal("spec without baseConfigHash should not support config activation") 112 + } 113 + 114 + spec.BaseConfigHash = "abcdef" 115 + if !spec.SupportsConfigActivation() { 116 + t.Fatal("spec with baseConfigHash should support config activation") 117 + } 118 + } 119 + 120 + func TestImageSpecRequiresShell(t *testing.T) { 121 + spec := validImageSpec() 122 + spec.Shell = "" 123 + err := spec.Validate() 124 + if err == nil || !strings.Contains(err.Error(), "shell") { 125 + t.Fatalf("spec without shell should fail validation, got: %v", err) 126 + } 127 + }
+48
spindle/engines/microvm/models.go
··· 1 + package microvm 2 + 3 + import ( 4 + "fmt" 5 + "slices" 6 + ) 7 + 8 + type manifestWorkflow struct { 9 + Image string `yaml:"image"` 10 + Services map[string]any `yaml:"services"` 11 + Virtualisation map[string]any `yaml:"virtualisation"` 12 + Dependencies []string `yaml:"dependencies"` 13 + Registry map[string]any `yaml:"registry"` 14 + Environment map[string]string `yaml:"environment"` 15 + Caches map[string]string `yaml:"caches"` 16 + Steps []struct { 17 + Name string `yaml:"name"` 18 + Command string `yaml:"command"` 19 + Environment map[string]string `yaml:"environment"` 20 + } `yaml:"steps"` 21 + } 22 + 23 + // flattens the caches map into sorted substituter URLs and trusted public keys 24 + func workflowCaches(caches map[string]string) (urls []string, keys []string, err error) { 25 + for cacheURL, key := range caches { 26 + urls = append(urls, cacheURL) 27 + if key != "" { 28 + keys = append(keys, key) 29 + } 30 + } 31 + if _, err := parseCacheUpstreams(urls); err != nil { 32 + return nil, nil, fmt.Errorf("caches: %w", err) 33 + } 34 + slices.Sort(urls) 35 + slices.Sort(keys) 36 + return urls, keys, nil 37 + } 38 + 39 + type manifestConfig struct { 40 + Services map[string]any `yaml:"services" json:"services,omitempty"` 41 + Virtualisation map[string]any `yaml:"virtualisation" json:"virtualisation,omitempty"` 42 + Dependencies []string `yaml:"dependencies" json:"dependencies,omitempty"` 43 + Registry map[string]any `yaml:"registry" json:"registry,omitempty"` 44 + } 45 + 46 + func (c manifestConfig) Enabled() bool { 47 + return len(c.Services) > 0 || len(c.Virtualisation) > 0 || len(c.Dependencies) > 0 || len(c.Registry) > 0 48 + }
+41
spindle/engines/microvm/models_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "slices" 5 + "testing" 6 + ) 7 + 8 + func TestWorkflowCaches(t *testing.T) { 9 + urls, keys, err := workflowCaches(map[string]string{ 10 + "https://hydra.nixos.org/": "hydra.nixos.org-1:CNHJZBh9K4tP3EKF6FkkgeVYsS3ohTl+oS0Qa8bezVs=", 11 + "https://cache.garnix.io/": "cache.garnix.io:CTFPyKSLcx5RMJKfLo5EEPUObbA78b0YQ2DTCJXqr9g=", 12 + "https://unsigned.example/": "", 13 + }) 14 + if err != nil { 15 + t.Fatal(err) 16 + } 17 + 18 + wantURLs := []string{ 19 + "https://cache.garnix.io/", 20 + "https://hydra.nixos.org/", 21 + "https://unsigned.example/", 22 + } 23 + if !slices.Equal(urls, wantURLs) { 24 + t.Fatalf("urls: got %v, want %v", urls, wantURLs) 25 + } 26 + wantKeys := []string{ 27 + "cache.garnix.io:CTFPyKSLcx5RMJKfLo5EEPUObbA78b0YQ2DTCJXqr9g=", 28 + "hydra.nixos.org-1:CNHJZBh9K4tP3EKF6FkkgeVYsS3ohTl+oS0Qa8bezVs=", 29 + } 30 + if !slices.Equal(keys, wantKeys) { 31 + t.Fatalf("keys: got %v, want %v", keys, wantKeys) 32 + } 33 + } 34 + 35 + func TestWorkflowCachesRejectsBadURLs(t *testing.T) { 36 + for _, bad := range []string{"ftp://cache.example/", "not a url"} { 37 + if _, _, err := workflowCaches(map[string]string{bad: ""}); err == nil { 38 + t.Errorf("workflowCaches(%q): expected error, got nil", bad) 39 + } 40 + } 41 + }
+25
spindle/engines/microvm/netns_wrapper.sh.tmpl
··· 1 + #!/bin/sh 2 + set -eu 3 + 4 + pid_file="$1"; shift 5 + ip="$1"; shift 6 + mount="$1"; shift 7 + resolv_conf="$1"; shift 8 + 9 + printf '%s\n' "$$" > "$pid_file" 10 + 11 + while ! "$ip" link show {{.TapName}} >/dev/null 2>&1; do 12 + sleep 0.02 13 + done 14 + 15 + "$mount" --bind "$resolv_conf" /etc/resolv.conf 16 + 17 + # fail if we can't add a route, ipv6 is skipped if not enabled on host 18 + for route in{{range .BlockedRoutes}} {{.}}{{end}}; do 19 + case "$route" in 20 + *:*) [ -d /proc/sys/net/ipv6 ] || continue ;; 21 + esac 22 + "$ip" route add blackhole "$route" 23 + done 24 + 25 + exec "$@"
+152
spindle/engines/microvm/networking.go
··· 1 + package microvm 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + _ "embed" 7 + "fmt" 8 + "log/slog" 9 + "net" 10 + "os" 11 + "os/exec" 12 + "text/template" 13 + ) 14 + 15 + // https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml 16 + // https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml 17 + // https://datatracker.ietf.org/doc/rfc6890/ 18 + var blockedNamespaceRoutes = []string{ 19 + "0.0.0.0/8", // unspecified / "this network" addresses 20 + "10.0.0.0/8", // private network 21 + "100.64.0.0/10", // shared carrier-grade nat space 22 + "127.0.0.0/8", // loopback 23 + "169.254.0.0/16", // link-local / autoconfiguration 24 + "172.16.0.0/12", // private network 25 + "192.0.0.0/24", // ietf protocol assignments 26 + "192.0.2.0/24", // documentation / examples 27 + "192.88.99.0/24", // deprecated 6to4 relay anycast 28 + "192.168.0.0/16", // private network 29 + "198.18.0.0/15", // benchmarking / testing 30 + "198.51.100.0/24", // documentation / examples 31 + "203.0.113.0/24", // documentation / examples 32 + "224.0.0.0/4", // multicast 33 + "240.0.0.0/4", // reserved / future use, includes limited broadcast 34 + "::/128", // unspecified address 35 + "::1/128", // loopback 36 + "::ffff:0:0/96", // ipv4-mapped addresses 37 + "64:ff9b::/96", // ipv4/ipv6 translation prefix 38 + "100::/64", // discard-only prefix 39 + "2001::/23", // ietf protocol assignments 40 + "2001:db8::/32", // documentation / examples 41 + "2002::/16", // deprecated 6to4 addressing 42 + "fc00::/7", // unique local addresses 43 + "fe80::/10", // link-local unicast 44 + "ff00::/8", // multicast 45 + } 46 + 47 + var blockedNamespaceNets = func() []*net.IPNet { 48 + nets := make([]*net.IPNet, 0, len(blockedNamespaceRoutes)) 49 + for _, route := range blockedNamespaceRoutes { 50 + _, ipnet, err := net.ParseCIDR(route) 51 + if err != nil { 52 + panic(fmt.Sprintf("parse blocked route %q: %v", route, err)) 53 + } 54 + nets = append(nets, ipnet) 55 + } 56 + return nets 57 + }() 58 + 59 + //go:embed netns_wrapper.sh.tmpl 60 + var netnsWrapperTemplate string 61 + 62 + type netnsWrapperData struct { 63 + TapName string 64 + BlockedRoutes []string 65 + } 66 + 67 + func writeNetnsWrapper(path string, dev bool) error { 68 + tmpl, err := template.New("netns-wrapper").Parse(netnsWrapperTemplate) 69 + if err != nil { 70 + return fmt.Errorf("parse qemu network namespace wrapper template: %w", err) 71 + } 72 + 73 + var script bytes.Buffer 74 + 75 + var routes []string 76 + if !dev { 77 + routes = blockedNamespaceRoutes 78 + } 79 + 80 + err = tmpl.Execute(&script, netnsWrapperData{ 81 + TapName: netnsTapName, 82 + BlockedRoutes: routes, 83 + }) 84 + if err != nil { 85 + return fmt.Errorf("render qemu network namespace wrapper template: %w", err) 86 + } 87 + 88 + if err := os.WriteFile(path, script.Bytes(), 0o700); err != nil { 89 + return fmt.Errorf("write qemu network namespace wrapper: %w", err) 90 + } 91 + 92 + return nil 93 + } 94 + 95 + type slirpNamespace struct { 96 + spec ImageSpec 97 + pidFile string 98 + dev bool 99 + } 100 + 101 + func (n *slirpNamespace) Start(ctx context.Context, logFile *os.File, logger *slog.Logger) (*exec.Cmd, *os.File, error) { 102 + pid, err := waitForPIDFile(ctx, n.pidFile) 103 + if err != nil { 104 + return nil, nil, err 105 + } 106 + 107 + exitR, exitW, err := os.Pipe() 108 + if err != nil { 109 + return nil, nil, fmt.Errorf("create slirp4netns exit pipe: %w", err) 110 + } 111 + defer exitR.Close() // always close our read end; child gets it via ExtraFiles dup 112 + 113 + var ok bool 114 + defer func() { 115 + if !ok { 116 + _ = exitW.Close() 117 + } 118 + }() 119 + 120 + slirpPath, err := exec.LookPath("slirp4netns") 121 + if err != nil { 122 + return nil, nil, fmt.Errorf("slirp4netns command not found in PATH: %w", err) 123 + } 124 + 125 + args := []string{ 126 + "--configure", 127 + "--mtu=" + netnsMTU, 128 + } 129 + if !n.dev { 130 + args = append(args, "--disable-host-loopback") 131 + } 132 + args = append(args, 133 + "--enable-sandbox", 134 + "--enable-seccomp", 135 + "--exit-fd=3", 136 + "--cidr="+outerSlirpCIDR, 137 + pid, 138 + netnsTapName, 139 + ) 140 + 141 + cmd := exec.CommandContext(ctx, slirpPath, args...) 142 + cmd.ExtraFiles = []*os.File{exitR} 143 + cmd.Stdout = logFile 144 + cmd.Stderr = logFile 145 + if err := cmd.Start(); err != nil { 146 + return nil, nil, fmt.Errorf("start slirp4netns: %w", err) 147 + } 148 + logger.Info("started slirp4netns network namespace", "pid", pid, "cidr", outerSlirpCIDR, "tap", netnsTapName) 149 + 150 + ok = true 151 + return cmd, exitW, nil 152 + }
+100
spindle/engines/microvm/nixos_toplevel_cache.go
··· 1 + package microvm 2 + 3 + import ( 4 + "crypto/sha256" 5 + "database/sql" 6 + "encoding/hex" 7 + "encoding/json" 8 + "errors" 9 + "fmt" 10 + "time" 11 + 12 + "tangled.org/core/spindle/db" 13 + ) 14 + 15 + const nixosToplevelCacheSchemaVersion = 1 16 + 17 + type nixosToplevelCacheRecord struct { 18 + ConfigKey string `json:"config_key"` 19 + Toplevel string `json:"toplevel"` 20 + UpdatedAt time.Time `json:"updated_at"` 21 + } 22 + 23 + type nixosToplevelCacheStore struct { 24 + db *db.DB 25 + } 26 + 27 + func newNixOSToplevelCacheStore(d *db.DB) nixosToplevelCacheStore { 28 + return nixosToplevelCacheStore{db: d} 29 + } 30 + 31 + func (s nixosToplevelCacheStore) Lookup(configKey string) (nixosToplevelCacheRecord, bool, error) { 32 + if s.db == nil { 33 + return nixosToplevelCacheRecord{}, false, nil 34 + } 35 + r, err := s.db.GetNixOSToplevelCacheRecord(configKey) 36 + if err != nil { 37 + if errors.Is(err, sql.ErrNoRows) { 38 + return nixosToplevelCacheRecord{}, false, nil 39 + } 40 + return nixosToplevelCacheRecord{}, false, err 41 + } 42 + return nixosToplevelCacheRecord{ 43 + ConfigKey: r.ConfigKey, 44 + Toplevel: r.Toplevel, 45 + UpdatedAt: r.UpdatedAt, 46 + }, true, nil 47 + } 48 + 49 + func (s nixosToplevelCacheStore) Commit(configKey, toplevel string) error { 50 + if configKey == "" { 51 + return fmt.Errorf("config key is empty") 52 + } 53 + if toplevel == "" { 54 + return fmt.Errorf("config toplevel is empty") 55 + } 56 + if s.db == nil { 57 + return nil 58 + } 59 + return s.db.SaveNixOSToplevelCacheRecord(configKey, toplevel) 60 + } 61 + 62 + func BaseConfigHash(imageSpec ImageSpec) (string, error) { 63 + if imageSpec.BaseConfigHash == "" { 64 + return "", fmt.Errorf("microvm image spec missing baseConfigHash") 65 + } 66 + return imageSpec.BaseConfigHash, nil 67 + } 68 + 69 + func userConfigHash(cfg manifestConfig) string { 70 + data, _ := json.Marshal(cfg) 71 + sum := sha256.Sum256(data) 72 + return hex.EncodeToString(sum[:]) 73 + } 74 + 75 + func buildConfigKey(imageSpec ImageSpec, cfg manifestConfig) (string, error) { 76 + baseHash, err := BaseConfigHash(imageSpec) 77 + if err != nil { 78 + return "", err 79 + } 80 + payload := struct { 81 + Schema int `json:"schema"` 82 + BaseConfig string `json:"base_config"` 83 + UserConfig string `json:"user_config"` 84 + }{ 85 + Schema: nixosToplevelCacheSchemaVersion, 86 + BaseConfig: baseHash, 87 + UserConfig: userConfigHash(cfg), 88 + } 89 + data, _ := json.Marshal(payload) 90 + sum := sha256.Sum256(data) 91 + return hex.EncodeToString(sum[:]), nil 92 + } 93 + 94 + func BuildConfigKey(imageSpec ImageSpec, userConfigJSON string) (string, error) { 95 + var cfg manifestConfig 96 + if err := json.Unmarshal([]byte(userConfigJSON), &cfg); err != nil { 97 + return "", err 98 + } 99 + return buildConfigKey(imageSpec, cfg) 100 + }
+676
spindle/engines/microvm/qemu.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + _ "embed" 6 + "encoding/json" 7 + "errors" 8 + "fmt" 9 + "log/slog" 10 + "os" 11 + "os/exec" 12 + "path/filepath" 13 + "strconv" 14 + "strings" 15 + "sync" 16 + "time" 17 + 18 + "github.com/digitalocean/go-qemu/qmp" 19 + "github.com/google/uuid" 20 + ) 21 + 22 + const ( 23 + defaultQMPTimeout = 10 * time.Second 24 + outerSlirpCIDR = "10.0.2.0/24" 25 + innerSlirpNet = "10.0.3.0/24" 26 + innerSlirpHost = "10.0.3.2" 27 + innerSlirpDNS = "10.0.3.3" 28 + innerSlirpDHCP = "10.0.3.15" 29 + netnsTapName = "tap0" 30 + netnsMTU = "65520" 31 + ) 32 + 33 + type QEMUConfig struct { 34 + Image ImageSpec 35 + BootTimeout time.Duration 36 + CID uint32 37 + EnableKVM bool 38 + QEMULogPath string 39 + QMPPath string 40 + SerialLogPath string 41 + WorkDir string 42 + VolumePaths map[string]string 43 + VolumeBaseName string 44 + Cgroup CgroupLimits 45 + Dev bool 46 + } 47 + 48 + type QEMUVMHandle struct { 49 + cid uint32 50 + Process *os.Process 51 + qemuLogPath string 52 + QMPMon *qmp.SocketMonitor 53 + QMPPath string 54 + serialLogPath string 55 + workDir string 56 + 57 + cmd *exec.Cmd 58 + done chan struct{} 59 + qemuLogFile *os.File 60 + cgroup *CgroupHandle 61 + slirpCmd *exec.Cmd 62 + slirpExit *os.File 63 + waitErr error 64 + waitErrMu sync.Mutex 65 + } 66 + 67 + type qemuRunner struct{} 68 + 69 + func (qemuRunner) Validate(spec ImageSpec, enableKVM bool) error { 70 + if _, err := exec.LookPath(spec.RunnerCmd()); err != nil { 71 + return fmt.Errorf("required host command %q not found in PATH: %w", spec.RunnerCmd(), err) 72 + } 73 + if _, err := os.Stat("/dev/vhost-vsock"); err != nil { 74 + return fmt.Errorf("microvm requires /dev/vhost-vsock for vhost-vsock-device: %w", err) 75 + } 76 + if enableKVM { 77 + if _, err := os.Stat("/dev/kvm"); err != nil { 78 + return fmt.Errorf("microvm KVM was requested but /dev/kvm is not accessible: %w", err) 79 + } 80 + } 81 + if len(spec.NetworkInterfaces) > 0 { 82 + if _, err := os.Stat("/dev/net/tun"); err != nil { 83 + return fmt.Errorf("microvm slirp4netns networking requires /dev/net/tun: %w", err) 84 + } 85 + for _, cmd := range []string{"ip", "mount", "slirp4netns", "unshare"} { 86 + if _, err := exec.LookPath(cmd); err != nil { 87 + return fmt.Errorf("required host command %q not found in PATH: %w", cmd, err) 88 + } 89 + } 90 + } 91 + return nil 92 + } 93 + 94 + func (qemuRunner) Start(ctx context.Context, cfg VMConfig, volumePaths map[string]string, logger *slog.Logger) (VMHandle, error) { 95 + bootTimeout := cfg.BootTimeout 96 + if bootTimeout == 0 { 97 + bootTimeout = 10 * time.Second 98 + } 99 + return StartQEMU(ctx, QEMUConfig{ 100 + Image: cfg.Image, 101 + BootTimeout: bootTimeout, 102 + CID: cfg.CID, 103 + EnableKVM: cfg.EnableKVM, 104 + WorkDir: cfg.WorkDir, 105 + VolumePaths: volumePaths, 106 + Cgroup: cfg.Cgroup, 107 + Dev: cfg.Dev, 108 + }, logger) 109 + } 110 + 111 + func StartQEMU(ctx context.Context, cfg QEMUConfig, logger *slog.Logger) (VMHandle, error) { 112 + if logger == nil { 113 + logger = slog.Default() 114 + } 115 + 116 + workDir := cfg.WorkDir 117 + 118 + handle := &QEMUVMHandle{ 119 + workDir: workDir, 120 + } 121 + 122 + var ok bool 123 + defer func() { 124 + if !ok { 125 + _ = handle.Close() 126 + } 127 + }() 128 + 129 + cid := cfg.CID 130 + if cid == 0 { 131 + var err error 132 + cid, err = AllocateCID() 133 + if err != nil { 134 + return nil, err 135 + } 136 + } 137 + if cid < minGuestCID { 138 + return nil, fmt.Errorf("guest CID must be >= %d", minGuestCID) 139 + } 140 + handle.cid = cid 141 + 142 + volumePaths := cfg.VolumePaths 143 + 144 + qemuLogPath := cfg.QEMULogPath 145 + if qemuLogPath == "" { 146 + qemuLogPath = filepath.Join(workDir, "qemu.log") 147 + } 148 + qemuLogFile, err := createParentedFile(qemuLogPath) 149 + if err != nil { 150 + return nil, err 151 + } 152 + handle.qemuLogPath = qemuLogPath 153 + handle.qemuLogFile = qemuLogFile 154 + 155 + serialLogPath := cfg.SerialLogPath 156 + if serialLogPath == "" { 157 + serialLogPath = filepath.Join(workDir, "serial.log") 158 + } 159 + if err := os.MkdirAll(filepath.Dir(serialLogPath), 0o755); err != nil { 160 + return nil, fmt.Errorf("create serial log directory: %w", err) 161 + } 162 + handle.serialLogPath = serialLogPath 163 + 164 + qmpPath := cfg.QMPPath 165 + if qmpPath == "" { 166 + qmpPath = filepath.Join(workDir, "qmp.sock") 167 + } 168 + handle.QMPPath = qmpPath 169 + 170 + qemuCmd := cfg.Image.RunnerCmd() 171 + qemuBinary, err := exec.LookPath(qemuCmd) 172 + if err != nil { 173 + return nil, fmt.Errorf("%s command not found in PATH: %w", qemuCmd, err) 174 + } 175 + 176 + args, err := qemuArgs(qemuArgsConfig{ 177 + Image: cfg.Image, 178 + CID: cid, 179 + EnableKVM: cfg.EnableKVM, 180 + QMPPath: qmpPath, 181 + SerialLogPath: serialLogPath, 182 + VolumePaths: volumePaths, 183 + }) 184 + if err != nil { 185 + return nil, err 186 + } 187 + 188 + cmd, slirpNet, err := qemuCommand(ctx, qemuBinary, args, cfg.Image, workDir, cfg.Dev) 189 + if err != nil { 190 + return nil, err 191 + } 192 + cmd.Env = append(os.Environ(), "TMPDIR="+workDir) 193 + cmd.Stdout = qemuLogFile 194 + cmd.Stderr = qemuLogFile 195 + 196 + cgroup, err := prepareCgroup(cfg.Cgroup, logger) 197 + if err != nil { 198 + return nil, err 199 + } 200 + handle.cgroup = cgroup 201 + 202 + logger.Info("starting qemu microvm", "cid", cid, "workDir", workDir, "serialLog", serialLogPath, "qmp", qmpPath) 203 + if err := cmd.Start(); err != nil { 204 + return nil, fmt.Errorf("starting qemu: %w", err) 205 + } 206 + handle.cmd = cmd 207 + handle.Process = cmd.Process 208 + handle.done = make(chan struct{}) 209 + go func() { 210 + err := cmd.Wait() 211 + handle.waitErrMu.Lock() 212 + handle.waitErr = err 213 + handle.waitErrMu.Unlock() 214 + close(handle.done) 215 + }() 216 + 217 + if err := cgroup.AddProcess(cmd.Process.Pid, logger); err != nil { 218 + return nil, err 219 + } 220 + 221 + if slirpNet != nil { 222 + handle.slirpCmd, handle.slirpExit, err = slirpNet.Start(ctx, qemuLogFile, logger) 223 + if err != nil { 224 + return nil, err 225 + } 226 + if handle.slirpCmd != nil && handle.slirpCmd.Process != nil { 227 + if err := cgroup.AddProcess(handle.slirpCmd.Process.Pid, logger); err != nil { 228 + return nil, err 229 + } 230 + } 231 + } 232 + 233 + qmpTimeout := cfg.BootTimeout 234 + if qmpTimeout == 0 { 235 + qmpTimeout = defaultQMPTimeout 236 + } 237 + if err := handle.waitForQMP(ctx, qmpTimeout); err != nil { 238 + return nil, err 239 + } 240 + 241 + status, err := handle.QMPQueryStatus() 242 + if err != nil { 243 + return nil, err 244 + } 245 + if status != "running" { 246 + return nil, fmt.Errorf("qemu guest not running (status: %s)", status) 247 + } 248 + logger.Info("qemu microvm running", "cid", cid, "status", status) 249 + 250 + ok = true 251 + return handle, nil 252 + } 253 + 254 + func (h *QEMUVMHandle) Wait() error { 255 + if h == nil || h.done == nil { 256 + return nil 257 + } 258 + <-h.done 259 + h.waitErrMu.Lock() 260 + defer h.waitErrMu.Unlock() 261 + return h.waitErr 262 + } 263 + 264 + func (h *QEMUVMHandle) WaitContext(ctx context.Context) error { 265 + if h == nil || h.done == nil { 266 + return nil 267 + } 268 + select { 269 + case <-h.done: 270 + h.waitErrMu.Lock() 271 + defer h.waitErrMu.Unlock() 272 + return h.waitErr 273 + case <-ctx.Done(): 274 + return ctx.Err() 275 + } 276 + } 277 + 278 + func (h *QEMUVMHandle) Kill() error { 279 + if h == nil || h.Process == nil { 280 + return nil 281 + } 282 + return h.Process.Kill() 283 + } 284 + 285 + func (h *QEMUVMHandle) Shutdown(ctx context.Context) error { 286 + if h == nil { 287 + return nil 288 + } 289 + if h.QMPMon != nil { 290 + if err := h.QMPSystemPowerdown(); err != nil { 291 + return err 292 + } 293 + } 294 + if h.done == nil { 295 + return nil 296 + } 297 + select { 298 + case <-h.done: 299 + return h.Wait() 300 + case <-ctx.Done(): 301 + _ = h.Kill() 302 + _ = h.Wait() 303 + return ctx.Err() 304 + } 305 + } 306 + 307 + func (h *QEMUVMHandle) Close() error { 308 + if h == nil { 309 + return nil 310 + } 311 + 312 + var closeErr error 313 + if h.QMPMon != nil { 314 + closeErr = errors.Join(closeErr, h.QMPMon.Disconnect()) 315 + h.QMPMon = nil 316 + } 317 + if h.Process != nil { 318 + _ = h.Process.Kill() 319 + _ = h.Wait() 320 + } 321 + if h.slirpExit != nil { 322 + _ = h.slirpExit.Close() 323 + h.slirpExit = nil 324 + } 325 + if h.slirpCmd != nil && h.slirpCmd.Process != nil { 326 + _ = h.slirpCmd.Process.Kill() 327 + _ = h.slirpCmd.Wait() 328 + h.slirpCmd = nil 329 + } 330 + if h.qemuLogFile != nil { 331 + closeErr = errors.Join(closeErr, h.qemuLogFile.Close()) 332 + h.qemuLogFile = nil 333 + } 334 + if h.cgroup != nil { 335 + closeErr = errors.Join(closeErr, h.cgroup.Close()) 336 + h.cgroup = nil 337 + } 338 + return closeErr 339 + } 340 + 341 + func (h *QEMUVMHandle) QMPRun(command qmp.Command) ([]byte, error) { 342 + if h == nil || h.QMPMon == nil { 343 + return nil, fmt.Errorf("qmp monitor is not connected") 344 + } 345 + data, err := json.Marshal(command) 346 + if err != nil { 347 + return nil, err 348 + } 349 + return h.QMPMon.Run(data) 350 + } 351 + 352 + func (h *QEMUVMHandle) QMPQueryStatus() (string, error) { 353 + raw, err := h.QMPRun(qmp.Command{Execute: "query-status"}) 354 + if err != nil { 355 + return "", fmt.Errorf("qmp query-status failed: %w", err) 356 + } 357 + 358 + var resp struct { 359 + Return struct { 360 + Status string `json:"status"` 361 + } `json:"return"` 362 + } 363 + if err := json.Unmarshal(raw, &resp); err != nil { 364 + return "", fmt.Errorf("qmp query-status parse: %w", err) 365 + } 366 + return resp.Return.Status, nil 367 + } 368 + 369 + func (h *QEMUVMHandle) QMPSystemPowerdown() error { 370 + _, err := h.QMPRun(qmp.Command{Execute: "system_powerdown"}) 371 + return err 372 + } 373 + 374 + func (h *QEMUVMHandle) Logs() VMLogs { 375 + if h == nil { 376 + return VMLogs{} 377 + } 378 + return VMLogs{ 379 + Serial: h.serialLogPath, 380 + Extra: map[string]string{ 381 + "qemu": h.qemuLogPath, 382 + }, 383 + } 384 + } 385 + 386 + func (h *QEMUVMHandle) CID() uint32 { 387 + if h == nil { 388 + return 0 389 + } 390 + return h.cid 391 + } 392 + 393 + func (h *QEMUVMHandle) WorkDir() string { 394 + if h == nil { 395 + return "" 396 + } 397 + return h.workDir 398 + } 399 + 400 + func (h *QEMUVMHandle) OOMKilled() bool { 401 + if h == nil { 402 + return false 403 + } 404 + return h.cgroup.OOMKilled() 405 + } 406 + 407 + func (h *QEMUVMHandle) waitForQMP(ctx context.Context, timeout time.Duration) error { 408 + qmpCtx, cancel := context.WithTimeout(ctx, timeout) 409 + defer cancel() 410 + 411 + var lastErr error 412 + for { 413 + mon, err := qmp.NewSocketMonitor("unix", h.QMPPath, 2*time.Second) 414 + if err == nil { 415 + if err = mon.Connect(); err == nil { 416 + h.QMPMon = mon 417 + return nil 418 + } 419 + _ = mon.Disconnect() 420 + } 421 + lastErr = err 422 + 423 + select { 424 + case <-qmpCtx.Done(): 425 + return fmt.Errorf("qmp connect timeout: %w", lastErr) 426 + case <-h.done: 427 + return fmt.Errorf("qemu exited before qmp was ready: %w", h.Wait()) 428 + case <-time.After(25 * time.Millisecond): 429 + } 430 + } 431 + } 432 + 433 + func qemuCommand( 434 + ctx context.Context, 435 + qemuBinary string, 436 + args []string, 437 + spec ImageSpec, 438 + workDir string, 439 + dev bool, 440 + ) (*exec.Cmd, *slirpNamespace, error) { 441 + if len(spec.NetworkInterfaces) == 0 { 442 + return exec.CommandContext(ctx, qemuBinary, args...), nil, nil 443 + } 444 + 445 + ipPath, err := exec.LookPath("ip") 446 + if err != nil { 447 + return nil, nil, fmt.Errorf("ip command not found in PATH: %w", err) 448 + } 449 + mountPath, err := exec.LookPath("mount") 450 + if err != nil { 451 + return nil, nil, fmt.Errorf("mount command not found in PATH: %w", err) 452 + } 453 + unsharePath, err := exec.LookPath("unshare") 454 + if err != nil { 455 + return nil, nil, fmt.Errorf("unshare command not found in PATH: %w", err) 456 + } 457 + 458 + pidFile, resolvPath, wrapperPath, err := prepareQEMUNetnsFiles(workDir, dev) 459 + if err != nil { 460 + return nil, nil, err 461 + } 462 + 463 + cmdArgs := append([]string{ 464 + "--user", 465 + "--map-root-user", 466 + "--net", 467 + "--mount", 468 + "--propagation", "private", 469 + "--", 470 + wrapperPath, 471 + pidFile, 472 + ipPath, 473 + mountPath, 474 + resolvPath, 475 + qemuBinary, 476 + }, args...) 477 + 478 + cmd := exec.CommandContext(ctx, unsharePath, cmdArgs...) 479 + 480 + return cmd, &slirpNamespace{ 481 + spec: spec, 482 + pidFile: pidFile, 483 + dev: dev, 484 + }, nil 485 + } 486 + 487 + func prepareQEMUNetnsFiles(workDir string, dev bool) (pidFile, resolvPath, wrapperPath string, err error) { 488 + pidFile = filepath.Join(workDir, "qemu-netns.pid") 489 + resolvPath = filepath.Join(workDir, "qemu-netns-resolv.conf") 490 + wrapperPath = filepath.Join(workDir, "qemu-netns-wrapper") 491 + 492 + // the guest resolves through shuttle on 127.0.0.1. keep qemu's slirp DNS 493 + // pointed at an unroutable local resolver inside this network namespace so 494 + // direct guest queries to 10.0.3.3 don't bypass the shuttle dns policy. 495 + if err := os.WriteFile(resolvPath, []byte("nameserver 127.0.0.1\n"), 0o644); err != nil { 496 + return "", "", "", fmt.Errorf("write qemu network namespace resolv.conf: %w", err) 497 + } 498 + 499 + if err := writeNetnsWrapper(wrapperPath, dev); err != nil { 500 + return "", "", "", fmt.Errorf("write qemu network namespace wrapper: %w", err) 501 + } 502 + 503 + return pidFile, resolvPath, wrapperPath, nil 504 + } 505 + 506 + type qemuArgsConfig struct { 507 + Image ImageSpec 508 + CID uint32 509 + EnableKVM bool 510 + QMPPath string 511 + SerialLogPath string 512 + VolumePaths map[string]string 513 + } 514 + 515 + func qemuArgs(cfg qemuArgsConfig) ([]string, error) { 516 + uuid := uuid.New() 517 + 518 + b := newArgBuilder(64) 519 + 520 + addQEMUMachineArgs(&b, cfg, uuid) 521 + addQEMUStoreArgs(&b, cfg) 522 + 523 + if cfg.EnableKVM { 524 + addQEMUKVMArgs(&b, cfg.Image) 525 + } 526 + 527 + if err := addQEMUVolumeArgs(&b, cfg); err != nil { 528 + return nil, err 529 + } 530 + 531 + if err := addQEMUNetworkArgs(&b, cfg.Image.NetworkInterfaces); err != nil { 532 + return nil, err 533 + } 534 + 535 + b.Optf("-device", "vhost-vsock-device,guest-cid=%d", cfg.CID) 536 + 537 + if len(cfg.Image.RunnerConfig.ExtraArgs) > 0 { 538 + b.Add(cfg.Image.RunnerConfig.ExtraArgs...) 539 + } 540 + 541 + return b.Args(), nil 542 + } 543 + 544 + func addQEMUMachineArgs(b *argBuilder, cfg qemuArgsConfig, uuid uuid.UUID) { 545 + if cfg.Image.RunnerConfig.Machine != "" { 546 + b.Opt("-M", cfg.Image.RunnerConfig.Machine) 547 + } 548 + b.Optf("-m", "%dM", cfg.Image.MemoryMiB) 549 + b.Opt("-smp", strconv.Itoa(cfg.Image.VCPUs)) 550 + 551 + b.Add( 552 + "-nodefaults", 553 + "-no-user-config", 554 + "-no-reboot", 555 + ) 556 + 557 + b.Opt("-kernel", cfg.Image.Kernel) 558 + b.Opt("-initrd", cfg.Image.Initrd) 559 + 560 + b.Opt("-device", "virtio-rng-device") 561 + 562 + b.Optf("-smbios", "type=1,uuid=%s", uuid) 563 + b.Opt("-serial", "file:"+cfg.SerialLogPath) 564 + 565 + // use virtio console if requsted. this is faster than the serial UART logging 566 + // because serial has a higher cost when being accesssed. we still have to 567 + // support serial itself for early kernel boot but thats OK. 568 + if cfg.Image.RunnerConfig.Console == "hvc0" { 569 + b.Optf("-chardev", "file,id=virtiocon0,path=%s,append=on", cfg.SerialLogPath) 570 + b.Add("-device", "virtio-serial-device") 571 + b.Opt("-device", "virtconsole,chardev=virtiocon0") 572 + } 573 + b.Opt("-display", "none") 574 + b.Opt("-monitor", "none") 575 + b.Opt("-append", cfg.Image.BootArgs) 576 + 577 + b.Opt("-sandbox", "on") 578 + b.Optf("-qmp", "unix:%s,server,nowait", cfg.QMPPath) 579 + } 580 + 581 + func addQEMUStoreArgs(b *argBuilder, cfg qemuArgsConfig) { 582 + drive := newOptionBuilder(8) 583 + drive.KV("id", "store") 584 + drive.KV("format", "raw") 585 + drive.Add("read-only=on") 586 + drive.KV("file", cfg.Image.StoreDisk) 587 + drive.Add("if=none") 588 + drive.Add("aio=io_uring") 589 + 590 + b.Opt("-drive", drive.String()) 591 + b.Opt("-device", "virtio-blk-device,drive=store") 592 + } 593 + 594 + func addQEMUKVMArgs(b *argBuilder, image ImageSpec) { 595 + b.Flag("-enable-kvm") 596 + if image.RunnerConfig.CPU != "" { 597 + b.Opt("-cpu", image.RunnerConfig.CPU) 598 + } 599 + b.Opt("-device", "i8042") 600 + } 601 + 602 + func addQEMUVolumeArgs(b *argBuilder, cfg qemuArgsConfig) error { 603 + for index, volume := range cfg.Image.Volumes { 604 + path := cfg.VolumePaths[volume.Image] 605 + if path == "" { 606 + return fmt.Errorf("missing prepared path for volume %q", volume.Image) 607 + } 608 + 609 + driveID := fmt.Sprintf("volume%d", index) 610 + 611 + drive := newOptionBuilder(10) 612 + drive.KV("id", driveID) 613 + drive.KV("format", "raw") 614 + drive.Add("read-only=off") 615 + drive.KV("file", path) 616 + drive.Add("if=none") 617 + drive.Add("aio=io_uring") 618 + drive.Add("discard=unmap") 619 + drive.Add("cache=none") 620 + 621 + b.Opt("-drive", drive.String()) 622 + b.Optf("-device", "virtio-blk-device,drive=%s", driveID) 623 + } 624 + 625 + return nil 626 + } 627 + 628 + func addQEMUNetworkArgs(b *argBuilder, interfaces []NetworkInterface) error { 629 + for _, networkInterface := range interfaces { 630 + if networkInterface.Type != "slirp4netns" { 631 + return fmt.Errorf("unsupported microvm network interface type %q", networkInterface.Type) 632 + } 633 + 634 + netdevOpts := newOptionBuilder(6) 635 + netdevOpts.Add("user") 636 + netdevOpts.KV("id", networkInterface.ID) 637 + netdevOpts.KV("net", innerSlirpNet) 638 + netdevOpts.KV("host", innerSlirpHost) 639 + netdevOpts.KV("dns", innerSlirpDNS) 640 + netdevOpts.KV("dhcpstart", innerSlirpDHCP) 641 + 642 + b.Opt("-netdev", netdevOpts.String()) 643 + b.Optf( 644 + "-device", "virtio-net-device,netdev=%s,mac=%s", 645 + networkInterface.ID, networkInterface.MAC, 646 + ) 647 + } 648 + 649 + return nil 650 + } 651 + 652 + func waitForPIDFile(ctx context.Context, path string) (string, error) { 653 + waitCtx, cancel := context.WithTimeout(ctx, 5*time.Second) 654 + defer cancel() 655 + 656 + ticker := time.NewTicker(25 * time.Millisecond) 657 + defer ticker.Stop() 658 + 659 + for { 660 + data, err := os.ReadFile(path) 661 + if err == nil { 662 + pid := strings.TrimSpace(string(data)) 663 + if pid != "" { 664 + return pid, nil 665 + } 666 + } else if !errors.Is(err, os.ErrNotExist) { 667 + return "", fmt.Errorf("read qemu network namespace pid: %w", err) 668 + } 669 + 670 + select { 671 + case <-waitCtx.Done(): 672 + return "", fmt.Errorf("waiting for qemu network namespace pid: %w", waitCtx.Err()) 673 + case <-ticker.C: 674 + } 675 + } 676 + }
+427
spindle/engines/microvm/read_cache_proxy.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "crypto/rand" 6 + "encoding/binary" 7 + "errors" 8 + "fmt" 9 + "io" 10 + "log/slog" 11 + "net" 12 + "net/http" 13 + "net/http/httputil" 14 + "net/url" 15 + "strings" 16 + "sync" 17 + "syscall" 18 + "time" 19 + 20 + "github.com/mdlayher/vsock" 21 + ) 22 + 23 + const ( 24 + readCacheProxyPortMin = 20000 25 + readCacheProxyPortMax = 60000 26 + ) 27 + 28 + type ReadCacheProxy struct { 29 + port uint32 30 + 31 + ln *vsock.Listener 32 + server *http.Server 33 + } 34 + 35 + func StartReadCacheProxy(ctx context.Context, cid uint32, upstreams []CacheUpstream, logger *slog.Logger) (*ReadCacheProxy, error) { 36 + if logger == nil { 37 + logger = slog.Default() 38 + } 39 + logger = logger.With("where", "read_cache", "cid", cid) 40 + 41 + if len(upstreams) == 0 { 42 + return nil, nil 43 + } 44 + 45 + ln, port, err := listenRandomVsockPort(ctx) 46 + if err != nil { 47 + return nil, err 48 + } 49 + 50 + proxy := &ReadCacheProxy{ 51 + port: port, 52 + ln: ln, 53 + } 54 + proxy.server = &http.Server{ 55 + Handler: cacheProxyHandler(upstreams, logger), 56 + Protocols: cacheProxyProtocols(), 57 + ReadHeaderTimeout: 10 * time.Second, 58 + } 59 + 60 + filtered := &cidFilteredVsockListener{ 61 + Listener: ln, 62 + cid: cid, 63 + logger: logger, 64 + } 65 + go func() { 66 + if err := proxy.server.Serve(filtered); err != nil && !errors.Is(err, http.ErrServerClosed) && !errors.Is(err, net.ErrClosed) { 67 + logger.Warn("proxy stopped", "cid", cid, "port", port, "error", err) 68 + } 69 + }() 70 + 71 + logger.Info("started proxy", "cid", cid, "port", port, "upstreams", len(upstreams)) 72 + return proxy, nil 73 + } 74 + 75 + func (p *ReadCacheProxy) Port() uint32 { 76 + if p == nil { 77 + return 0 78 + } 79 + return p.port 80 + } 81 + 82 + func (p *ReadCacheProxy) Close() error { 83 + if p == nil { 84 + return nil 85 + } 86 + 87 + var closeErr error 88 + if p.server != nil { 89 + ctx, cancel := context.WithTimeout(context.Background(), time.Second) 90 + closeErr = errors.Join(closeErr, p.server.Shutdown(ctx)) 91 + cancel() 92 + p.server = nil 93 + } 94 + if p.ln != nil { 95 + closeErr = errors.Join(closeErr, p.ln.Close()) 96 + p.ln = nil 97 + } 98 + return closeErr 99 + } 100 + 101 + type cidFilteredVsockListener struct { 102 + *vsock.Listener 103 + cid uint32 104 + logger *slog.Logger 105 + } 106 + 107 + func (l *cidFilteredVsockListener) Accept() (net.Conn, error) { 108 + for { 109 + conn, err := l.Listener.Accept() 110 + if err != nil { 111 + return nil, err 112 + } 113 + 114 + addr, ok := conn.RemoteAddr().(*vsock.Addr) 115 + if ok && addr.ContextID == l.cid { 116 + return conn, nil 117 + } 118 + 119 + l.logger.Warn("dropping proxy connection from unexpected cid", "remote", conn.RemoteAddr(), "expectedCID", l.cid) 120 + _ = conn.Close() 121 + } 122 + } 123 + 124 + func parseCacheUpstreams(raw []string) ([]*url.URL, error) { 125 + upstreams := make([]*url.URL, 0, len(raw)) 126 + seen := make(map[string]struct{}, len(raw)) 127 + for _, value := range raw { 128 + value = strings.TrimSpace(value) 129 + if value == "" { 130 + continue 131 + } 132 + if _, ok := seen[value]; ok { 133 + continue 134 + } 135 + seen[value] = struct{}{} 136 + 137 + parsed, err := url.Parse(value) 138 + if err != nil { 139 + return nil, fmt.Errorf("parse URL %q: %w", value, err) 140 + } 141 + if parsed.Scheme != "http" && parsed.Scheme != "https" { 142 + return nil, fmt.Errorf("URL %q uses unsupported scheme %q", value, parsed.Scheme) 143 + } 144 + if parsed.Host == "" { 145 + return nil, fmt.Errorf("URL %q is missing host", value) 146 + } 147 + upstreams = append(upstreams, parsed) 148 + } 149 + return upstreams, nil 150 + } 151 + 152 + type CacheUpstream struct { 153 + url *url.URL 154 + // guarded upstreams come from the workflow file 155 + // requests to them are refused for special-purpose address ranges 156 + guarded bool 157 + } 158 + 159 + func BuildCacheUpstreams(rawTrusted, rawGuarded []string) ([]CacheUpstream, error) { 160 + trusted, err := parseCacheUpstreams(rawTrusted) 161 + if err != nil { 162 + return nil, err 163 + } 164 + guarded, err := parseCacheUpstreams(rawGuarded) 165 + if err != nil { 166 + return nil, err 167 + } 168 + return mergeCacheUpstreams(trusted, guarded), nil 169 + } 170 + 171 + func mergeCacheUpstreams(trusted, guarded []*url.URL) []CacheUpstream { 172 + merged := make([]CacheUpstream, 0, len(trusted)+len(guarded)) 173 + seen := make(map[string]struct{}, len(trusted)+len(guarded)) 174 + for _, u := range trusted { 175 + if _, ok := seen[u.String()]; ok { 176 + continue 177 + } 178 + seen[u.String()] = struct{}{} 179 + merged = append(merged, CacheUpstream{url: u}) 180 + } 181 + for _, u := range guarded { 182 + if _, ok := seen[u.String()]; ok { 183 + continue 184 + } 185 + seen[u.String()] = struct{}{} 186 + merged = append(merged, CacheUpstream{url: u, guarded: true}) 187 + } 188 + return merged 189 + } 190 + 191 + func listenRandomVsockPort(ctx context.Context) (*vsock.Listener, uint32, error) { 192 + var lastErr error 193 + for range 32 { 194 + port, err := randomVsockPort() 195 + if err != nil { 196 + return nil, 0, err 197 + } 198 + ln, err := vsock.Listen(port, nil) 199 + if err == nil { 200 + return ln, port, nil 201 + } 202 + lastErr = err 203 + 204 + select { 205 + case <-ctx.Done(): 206 + return nil, 0, ctx.Err() 207 + default: 208 + } 209 + } 210 + return nil, 0, fmt.Errorf("listen on random vsock port: %w", lastErr) 211 + } 212 + 213 + func randomVsockPort() (uint32, error) { 214 + var data [4]byte 215 + if _, err := rand.Read(data[:]); err != nil { 216 + return 0, fmt.Errorf("allocate read vsock port: %w", err) 217 + } 218 + span := uint32(readCacheProxyPortMax - readCacheProxyPortMin) 219 + return readCacheProxyPortMin + binary.BigEndian.Uint32(data[:])%span, nil 220 + } 221 + 222 + var proxyTransport = &http.Transport{ 223 + Proxy: http.ProxyFromEnvironment, 224 + ForceAttemptHTTP2: true, 225 + MaxIdleConns: 100, 226 + IdleConnTimeout: 90 * time.Second, 227 + TLSHandshakeTimeout: 10 * time.Second, 228 + ExpectContinueTimeout: 1 * time.Second, 229 + } 230 + 231 + // for guarded upstreams, this will refuse requests made to blocked addresses 232 + var guardedProxyTransport = &http.Transport{ 233 + DialContext: (&net.Dialer{ 234 + Timeout: 30 * time.Second, 235 + KeepAlive: 30 * time.Second, 236 + Control: refuseSpecialPurposeAddrs, 237 + }).DialContext, 238 + ForceAttemptHTTP2: true, 239 + MaxIdleConns: 100, 240 + IdleConnTimeout: 90 * time.Second, 241 + TLSHandshakeTimeout: 10 * time.Second, 242 + ExpectContinueTimeout: 1 * time.Second, 243 + } 244 + 245 + // this should run after dns resolution, so it should cover any rebinding tricks 246 + func refuseSpecialPurposeAddrs(network, address string, _ syscall.RawConn) error { 247 + host, _, err := net.SplitHostPort(address) 248 + if err != nil { 249 + return fmt.Errorf("split dial address %q: %w", address, err) 250 + } 251 + ip := net.ParseIP(host) 252 + if ip == nil { 253 + return fmt.Errorf("refusing to dial non-IP address %q", host) 254 + } 255 + for _, ipnet := range blockedNamespaceNets { 256 + if ipnet.Contains(ip) { 257 + return fmt.Errorf("refusing to dial %s: %s is blocked for workflow caches", ip, ipnet) 258 + } 259 + } 260 + return nil 261 + } 262 + 263 + // the proxy is the cache as far as the guest is concerned, so we answer 264 + // /nix-cache-info ourselves instead of racing the upstreams for it. merging 265 + // those also doesn't make any sense (none of the options make sense for 266 + // merging) 267 + const nixCacheInfo = "StoreDir: /nix/store\nWantMassQuery: 1\nPriority: 40\n" 268 + 269 + func cacheProxyHandler(upstreams []CacheUpstream, logger *slog.Logger) http.Handler { 270 + proxy := &httputil.ReverseProxy{ 271 + // nothing to do here: the racing transport builds the full URL per 272 + // upstream, it just needs the guest's path/query left intact 273 + Rewrite: func(*httputil.ProxyRequest) {}, 274 + ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError), 275 + Transport: &parallelRacingTransport{ 276 + upstreams: upstreams, 277 + underlying: proxyTransport, 278 + guardedUnderlying: guardedProxyTransport, 279 + logger: logger, 280 + }, 281 + } 282 + 283 + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 284 + if r.URL.Path == "/nix-cache-info" { 285 + w.Header().Set("Content-Type", "text/x-nix-cache-info") 286 + _, _ = io.WriteString(w, nixCacheInfo) 287 + return 288 + } 289 + proxy.ServeHTTP(w, r) 290 + }) 291 + } 292 + 293 + func cacheProxyProtocols() *http.Protocols { 294 + protocols := new(http.Protocols) 295 + protocols.SetHTTP1(true) 296 + protocols.SetUnencryptedHTTP2(true) 297 + return protocols 298 + } 299 + 300 + func mergeQuery(base, extra string) string { 301 + switch { 302 + case base == "": 303 + return extra 304 + case extra == "": 305 + return base 306 + default: 307 + return base + "&" + extra 308 + } 309 + } 310 + 311 + type parallelRacingTransport struct { 312 + upstreams []CacheUpstream 313 + underlying http.RoundTripper 314 + guardedUnderlying http.RoundTripper 315 + logger *slog.Logger 316 + } 317 + 318 + func (t *parallelRacingTransport) RoundTrip(req *http.Request) (*http.Response, error) { 319 + type result struct { 320 + resp *http.Response 321 + err error 322 + is404 bool 323 + idx int 324 + } 325 + 326 + resCh := make(chan result, len(t.upstreams)) 327 + cancels := make([]context.CancelFunc, len(t.upstreams)) 328 + var wg sync.WaitGroup 329 + 330 + for i, upstream := range t.upstreams { 331 + wg.Add(1) 332 + ctx, cancel := context.WithCancel(req.Context()) 333 + cancels[i] = cancel 334 + 335 + go func(idx int, target CacheUpstream, uCtx context.Context) { 336 + defer wg.Done() 337 + 338 + raceReq := req.Clone(uCtx) 339 + // rewrite to the target, joining the upstream's base path/query 340 + // with what the guest asked for 341 + raceReq.URL.Scheme = target.url.Scheme 342 + raceReq.URL.Host = target.url.Host 343 + raceReq.URL.Path = strings.TrimSuffix(target.url.Path, "/") + req.URL.Path 344 + raceReq.URL.RawQuery = mergeQuery(target.url.RawQuery, req.URL.RawQuery) 345 + // Host wins over URL.Host for the outgoing Host header, and the 346 + // reverse proxy preserves the guest's (127.0.0.1:<port>), which 347 + // host-routed upstreams like fastly reject with a 421 348 + raceReq.Host = target.url.Host 349 + // the transport doesn't turn URL userinfo into basic auth, only 350 + // http.Client does, so do it ourselves 351 + if user := target.url.User; user != nil { 352 + password, _ := user.Password() 353 + raceReq.SetBasicAuth(user.Username(), password) 354 + } 355 + 356 + rt := t.underlying 357 + if target.guarded { 358 + rt = t.guardedUnderlying 359 + } 360 + resp, err := rt.RoundTrip(raceReq) 361 + if err != nil { 362 + resCh <- result{err: err, idx: idx} 363 + return 364 + } 365 + if resp.StatusCode == http.StatusNotFound { 366 + _ = resp.Body.Close() // don't care about the body of a 404 367 + resCh <- result{is404: true, idx: idx} 368 + return 369 + } 370 + if resp.StatusCode >= 400 { 371 + // an erroring upstream must not win over a healthy one 372 + _ = resp.Body.Close() 373 + resCh <- result{err: fmt.Errorf("upstream returned status %d", resp.StatusCode), idx: idx} 374 + return 375 + } 376 + // yay, ok 377 + resCh <- result{resp: resp, idx: idx} 378 + }(i, upstream, ctx) 379 + } 380 + 381 + go func() { 382 + wg.Wait() 383 + close(resCh) 384 + }() 385 + 386 + var total404s int 387 + for res := range resCh { 388 + if res.is404 { 389 + total404s++ 390 + if total404s == len(t.upstreams) { 391 + for _, cancel := range cancels { 392 + cancel() 393 + } 394 + return &http.Response{ 395 + StatusCode: http.StatusNotFound, 396 + Body: io.NopCloser(strings.NewReader("404 nix path not found")), 397 + Header: make(http.Header), 398 + Request: req, 399 + }, nil 400 + } 401 + continue 402 + } 403 + 404 + if res.err != nil { 405 + if !errors.Is(res.err, context.Canceled) { 406 + t.logger.Warn("upstream failed", 407 + "path", req.URL.Path, 408 + "error", res.err, 409 + ) 410 + } 411 + continue 412 + } 413 + 414 + // cancel other requests 415 + for i, cancel := range cancels { 416 + if i != res.idx { 417 + cancel() 418 + } 419 + } 420 + return res.resp, nil 421 + } 422 + 423 + for _, cancel := range cancels { 424 + cancel() 425 + } 426 + return nil, errors.New("all upstreams failed or timed out") 427 + }
+171
spindle/engines/microvm/read_cache_proxy_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "io" 5 + "log/slog" 6 + "net/http" 7 + "net/http/httptest" 8 + "strings" 9 + "testing" 10 + "time" 11 + ) 12 + 13 + func TestCacheProxyFallsBackOnNotFound(t *testing.T) { 14 + first := httptest.NewServer(http.NotFoundHandler()) 15 + defer first.Close() 16 + second := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 17 + if req.URL.Path != "/abc.narinfo" { 18 + t.Fatalf("path: got %q, want /abc.narinfo", req.URL.Path) 19 + } 20 + _, _ = io.WriteString(w, "ok") 21 + })) 22 + defer second.Close() 23 + 24 + upstreams, err := parseCacheUpstreams([]string{first.URL, second.URL}) 25 + if err != nil { 26 + t.Fatal(err) 27 + } 28 + 29 + req := httptest.NewRequest(http.MethodGet, "http://guest/abc.narinfo", nil) 30 + rec := httptest.NewRecorder() 31 + cacheProxyHandler(mergeCacheUpstreams(upstreams, nil), slog.Default()).ServeHTTP(rec, req) 32 + 33 + if rec.Code != http.StatusOK { 34 + t.Fatalf("status: got %d, want 200; body=%q", rec.Code, rec.Body.String()) 35 + } 36 + if got := rec.Body.String(); got != "ok" { 37 + t.Fatalf("body: got %q, want ok", got) 38 + } 39 + } 40 + 41 + func TestCacheProxyServesNixCacheInfoItself(t *testing.T) { 42 + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 43 + t.Errorf("upstream should not be hit, got request for %q", req.URL.Path) 44 + })) 45 + defer upstream.Close() 46 + 47 + upstreams, err := parseCacheUpstreams([]string{upstream.URL}) 48 + if err != nil { 49 + t.Fatal(err) 50 + } 51 + 52 + req := httptest.NewRequest(http.MethodGet, "http://guest/nix-cache-info", nil) 53 + rec := httptest.NewRecorder() 54 + cacheProxyHandler(mergeCacheUpstreams(upstreams, nil), slog.Default()).ServeHTTP(rec, req) 55 + 56 + if rec.Code != http.StatusOK { 57 + t.Fatalf("status: got %d, want 200; body=%q", rec.Code, rec.Body.String()) 58 + } 59 + if got := rec.Body.String(); got != nixCacheInfo { 60 + t.Fatalf("body: got %q, want %q", got, nixCacheInfo) 61 + } 62 + } 63 + 64 + func TestCacheProxyErrorStatusDoesNotWinRace(t *testing.T) { 65 + erroring := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 66 + http.Error(w, "misdirected", http.StatusMisdirectedRequest) 67 + })) 68 + defer erroring.Close() 69 + healthy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 70 + time.Sleep(50 * time.Millisecond) // lose the race to the erroring upstream 71 + _, _ = io.WriteString(w, "ok") 72 + })) 73 + defer healthy.Close() 74 + 75 + upstreams, err := parseCacheUpstreams([]string{erroring.URL, healthy.URL}) 76 + if err != nil { 77 + t.Fatal(err) 78 + } 79 + 80 + req := httptest.NewRequest(http.MethodGet, "http://guest/abc.narinfo", nil) 81 + rec := httptest.NewRecorder() 82 + cacheProxyHandler(mergeCacheUpstreams(upstreams, nil), slog.Default()).ServeHTTP(rec, req) 83 + 84 + if rec.Code != http.StatusOK { 85 + t.Fatalf("status: got %d, want 200; body=%q", rec.Code, rec.Body.String()) 86 + } 87 + if got := rec.Body.String(); got != "ok" { 88 + t.Fatalf("body: got %q, want ok", got) 89 + } 90 + } 91 + 92 + func TestCacheProxyJoinsSubpathQueryAndAuth(t *testing.T) { 93 + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 94 + if req.URL.Path != "/sub/cache/abc.narinfo" { 95 + t.Errorf("path: got %q, want /sub/cache/abc.narinfo", req.URL.Path) 96 + } 97 + if got := req.URL.Query().Get("token"); got != "s3cret" { 98 + t.Errorf("token: got %q, want s3cret", got) 99 + } 100 + if user, pass, ok := req.BasicAuth(); !ok || user != "dawn" || pass != "woof" { 101 + t.Errorf("basic auth: got %q/%q/%v, want dawn/woof/true", user, pass, ok) 102 + } 103 + _, _ = io.WriteString(w, "ok") 104 + })) 105 + defer upstream.Close() 106 + 107 + upstreamURL := "http://dawn:woof@" + strings.TrimPrefix(upstream.URL, "http://") + "/sub/cache/?token=s3cret" 108 + upstreams, err := parseCacheUpstreams([]string{upstreamURL}) 109 + if err != nil { 110 + t.Fatal(err) 111 + } 112 + 113 + req := httptest.NewRequest(http.MethodGet, "http://guest/abc.narinfo", nil) 114 + rec := httptest.NewRecorder() 115 + cacheProxyHandler(mergeCacheUpstreams(upstreams, nil), slog.Default()).ServeHTTP(rec, req) 116 + 117 + if rec.Code != http.StatusOK { 118 + t.Fatalf("status: got %d, want 200; body=%q", rec.Code, rec.Body.String()) 119 + } 120 + if got := rec.Body.String(); got != "ok" { 121 + t.Fatalf("body: got %q, want ok", got) 122 + } 123 + } 124 + 125 + func TestCacheProxyGuardedUpstreamCannotReachBlockedRanges(t *testing.T) { 126 + // httptest listens on 127.0.0.1, which is in the blocked ranges; reaching 127 + // it would mean a workflow-defined cache can hit the host's loopback 128 + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 129 + t.Errorf("guarded upstream on loopback should not be reachable, got request for %q", req.URL.Path) 130 + })) 131 + defer upstream.Close() 132 + 133 + upstreams, err := parseCacheUpstreams([]string{upstream.URL}) 134 + if err != nil { 135 + t.Fatal(err) 136 + } 137 + 138 + req := httptest.NewRequest(http.MethodGet, "http://guest/abc.narinfo", nil) 139 + rec := httptest.NewRecorder() 140 + cacheProxyHandler(mergeCacheUpstreams(nil, upstreams), slog.Default()).ServeHTTP(rec, req) 141 + 142 + if rec.Code != http.StatusBadGateway { 143 + t.Fatalf("status: got %d, want 502; body=%q", rec.Code, rec.Body.String()) 144 + } 145 + } 146 + 147 + func TestCacheProxyRewritesHostHeader(t *testing.T) { 148 + var upstreamHost string 149 + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 150 + if req.Host != upstreamHost { 151 + t.Errorf("host: got %q, want %q", req.Host, upstreamHost) 152 + } 153 + _, _ = io.WriteString(w, "ok") 154 + })) 155 + defer upstream.Close() 156 + upstreamHost = strings.TrimPrefix(upstream.URL, "http://") 157 + 158 + upstreams, err := parseCacheUpstreams([]string{upstream.URL}) 159 + if err != nil { 160 + t.Fatal(err) 161 + } 162 + 163 + req := httptest.NewRequest(http.MethodGet, "http://127.0.0.1:10500/abc.narinfo", nil) 164 + req.Host = "127.0.0.1:10500" 165 + rec := httptest.NewRecorder() 166 + cacheProxyHandler(mergeCacheUpstreams(upstreams, nil), slog.Default()).ServeHTTP(rec, req) 167 + 168 + if rec.Code != http.StatusOK { 169 + t.Fatalf("status: got %d, want 200; body=%q", rec.Code, rec.Body.String()) 170 + } 171 + }
+24
spindle/engines/microvm/runner.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "log/slog" 7 + ) 8 + 9 + type Runner interface { 10 + // check the host has what this backend needs for spec. 11 + Validate(spec ImageSpec, enableKVM bool) error 12 + Start(ctx context.Context, cfg VMConfig, volumePaths map[string]string, logger *slog.Logger) (VMHandle, error) 13 + } 14 + 15 + func runnerFor(runnerType string) (Runner, error) { 16 + switch runnerType { 17 + case "qemu", "": 18 + return qemuRunner{}, nil 19 + case "firecracker": 20 + return nil, fmt.Errorf("runner type %q not implemented yet", runnerType) 21 + default: 22 + return nil, fmt.Errorf("unsupported runner type %q", runnerType) 23 + } 24 + }
+86
spindle/engines/microvm/start-test-cache.sh
··· 1 + #!/usr/bin/env bash 2 + set -euo pipefail 3 + 4 + # start a local ncps binary cache 5 + # usage: ./start-test-cache.sh <test-dir> [port] 6 + 7 + if [ "$#" -lt 1 ]; then 8 + echo "Usage: $0 <test-dir> [ncps-port]" 9 + exit 1 10 + fi 11 + 12 + TEST_DIR="$(mkdir -p "$1" && cd "$1" && pwd)" 13 + PORT="${2:-8501}" 14 + 15 + ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" 16 + 17 + SECRET_KEY_PATH="$TEST_DIR/test-cache-key.secret" 18 + PUBLIC_KEY_PATH="$TEST_DIR/test-cache-key.pub" 19 + DB_PATH="$TEST_DIR/ncps.sqlite" 20 + CONFIG_PATH="$TEST_DIR/ncps-config.yaml" 21 + STORAGE_DIR="$TEST_DIR/storage" 22 + ENV_PATH="$TEST_DIR/env.sh" 23 + PID_PATH="$TEST_DIR/ncps.pid" 24 + 25 + mkdir -p "$STORAGE_DIR" 26 + 27 + echo "generating binary cache keys.." 28 + nix-store --generate-binary-cache-key test-cache-key "$SECRET_KEY_PATH" "$PUBLIC_KEY_PATH" 29 + PUBKEY_VAL=$(cat "$PUBLIC_KEY_PATH") 30 + 31 + echo "initializing ncps db..." 32 + nix shell nixpkgs#dbmate --command dbmate \ 33 + --migrations-dir "$(nix build --no-link --print-out-paths nixpkgs#ncps)/share/ncps/db/migrations/sqlite" \ 34 + -u "sqlite:$DB_PATH" \ 35 + up 36 + 37 + echo "writing ncps configuration..." 38 + cat <<EOF > "$CONFIG_PATH" 39 + cache: 40 + allow-delete-verb: true 41 + allow-put-verb: true 42 + hostname: "cache.local" 43 + database-url: "sqlite:$DB_PATH" 44 + secret-key-path: "$SECRET_KEY_PATH" 45 + sign-narinfo: true 46 + storage: 47 + local: "$STORAGE_DIR" 48 + upstream: 49 + urls: 50 + - https://cache.nixos.org 51 + public-keys: 52 + - cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= 53 + server: 54 + addr: "127.0.0.1:$PORT" 55 + EOF 56 + 57 + echo "starting ncps on port $PORT..." 58 + export CACHE_ALLOW_PUT_VERB=true 59 + nix shell nixpkgs#ncps --command ncps serve --config "$CONFIG_PATH" & 60 + NCPS_PID=$! 61 + echo "$NCPS_PID" > "$PID_PATH" 62 + 63 + # wait for connection 64 + for i in {1..30}; do 65 + if curl -s "http://127.0.0.1:$PORT/nix-cache-info" > /dev/null; then 66 + echo "ncps is healthy." 67 + break 68 + fi 69 + sleep 0.5 70 + if ! kill -0 "$NCPS_PID" 2>/dev/null; then 71 + echo "ncps exited unexpectedly during startup." 72 + exit 1 73 + fi 74 + done 75 + 76 + cat <<EOF > "$ENV_PATH" 77 + export CACHE_PUBKEY="$PUBKEY_VAL" 78 + export CACHE_PORT="$PORT" 79 + export CACHE_URL="http://127.0.0.1:$PORT" 80 + export CACHE_UPLOAD_URL="http://127.0.0.1:$PORT/upload" 81 + export CACHE_SECRET_KEY_PATH="$SECRET_KEY_PATH" 82 + export NCPS_PID="$NCPS_PID" 83 + export TEST_DIR="$TEST_DIR" 84 + EOF 85 + 86 + echo "cache server started successfully. source $ENV_PATH to use, and kill PID $NCPS_PID or check $PID_PATH to stop it."
+850
spindle/engines/microvm/test-spindle-microvm.sh
··· 1 + #!/usr/bin/env bash 2 + set -euo pipefail 3 + # note: needs `sudo modprobe vhost_vsock`! 4 + 5 + log() { 6 + printf "\n\033[1;36m>>> %s\033[0m\n" "$*" 7 + } 8 + 9 + strip_ansi() { 10 + local esc 11 + esc=$(printf '\033') 12 + sed -E "s/${esc}\[[0-9;]*[a-zA-Z]//g; s/${esc}\([a-zA-Z]//g" "$@" 13 + } 14 + 15 + declare -a TEST_NAMES=() 16 + declare -a TEST_STATUSES=() 17 + declare -a TEST_TIMES=() 18 + 19 + get_time_ms() { 20 + local t="${EPOCHREALTIME:-}" 21 + if [[ "$t" == *.* ]]; then 22 + local secs="${t%.*}" 23 + local subs="${t#*.}" 24 + subs="${subs:0:3}" 25 + while [ "${#subs}" -lt 3 ]; do 26 + subs="${subs}0" 27 + done 28 + echo "${secs}${subs}" 29 + else 30 + echo "$(date +%s)000" 31 + fi 32 + } 33 + 34 + format_duration() { 35 + local ms=$1 36 + local secs=$((ms / 1000)) 37 + local rem=$((ms % 1000)) 38 + printf "%d.%03ds" "$secs" "$rem" 39 + } 40 + 41 + print_summary() { 42 + if [ "${#TEST_NAMES[@]}" -eq 0 ]; then 43 + return 44 + fi 45 + printf "\n" 46 + log "test summary" 47 + echo "=========================================" 48 + local passed_count=0 49 + local failed_count=0 50 + local total_time=0 51 + for i in "${!TEST_NAMES[@]}"; do 52 + local name="${TEST_NAMES[$i]}" 53 + local status="${TEST_STATUSES[$i]}" 54 + local duration_ms="${TEST_TIMES[$i]}" 55 + local duration_str 56 + duration_str=$(format_duration "$duration_ms") 57 + 58 + local status_color="\033[0;32m" 59 + if [ "$status" = "Failed" ]; then 60 + status_color="\033[0;31m" 61 + failed_count=$((failed_count + 1)) 62 + else 63 + passed_count=$((passed_count + 1)) 64 + fi 65 + total_time=$((total_time + duration_ms)) 66 + 67 + printf " %-30s %b%-8b\033[0m %s\n" "$name" "$status_color" "$status" "$duration_str" 68 + done 69 + echo "-----------------------------------------" 70 + local total_tests="${#TEST_NAMES[@]}" 71 + local total_time_str 72 + total_time_str=$(format_duration "$total_time") 73 + printf " total: %d tests, %d passed, %d failed\n" "$total_tests" "$passed_count" "$failed_count" 74 + printf " total execution time: %s\n" "$total_time_str" 75 + echo "=========================================" 76 + } 77 + 78 + JOBS="${JOBS:-4}" 79 + while [[ $# -gt 0 ]]; do 80 + case "$1" in 81 + -j | --jobs) 82 + JOBS="$2" 83 + shift 2 84 + ;; 85 + --jobs=*) 86 + JOBS="${1#*=}" 87 + shift 88 + ;; 89 + --only) 90 + TEST_ONLY="$2" 91 + shift 2 92 + ;; 93 + --only=*) 94 + TEST_ONLY="${1#*=}" 95 + shift 96 + ;; 97 + *) 98 + echo "unknown argument: $1" >&2 99 + echo "usage: $0 [-j N|--jobs N] [--only TEST]" >&2 100 + exit 1 101 + ;; 102 + esac 103 + done 104 + if ! [[ "$JOBS" =~ ^[0-9]+$ ]] || [ "$JOBS" -lt 1 ]; then 105 + echo "error: --jobs must be a positive integer (got '$JOBS')" >&2 106 + exit 1 107 + fi 108 + 109 + pick_free_port() { 110 + local port 111 + for _ in $(seq 1 50); do 112 + port=$(((RANDOM % 16384) + 20000)) 113 + if ! (exec 3<>"/dev/tcp/127.0.0.1/$port") 2>/dev/null; then 114 + echo "$port" 115 + return 0 116 + fi 117 + done 118 + echo "error: could not find a free port for the cache" >&2 119 + return 1 120 + } 121 + 122 + SUCCESS=0 123 + rm -rf /tmp/test-spindle-microvm-logs 124 + 125 + log "setup local cache & temp environment" 126 + TEMP_DIR=$(mktemp -d -t test-spindle-microvm-XXXXXX) 127 + 128 + log "build spindle & microvm image tarball" 129 + nix develop --command go build -o spindle/spindle-microvm-run ./cmd/spindle-microvm-run 130 + TARBALL_PATH=$(nix build .#spindle-nixos-image-tarball --no-link --print-out-paths) 131 + mkdir -p "$TEMP_DIR/image" 132 + tar -C "$TEMP_DIR/image" -xzf "$TARBALL_PATH" 133 + IMAGE_SPEC_JSON="$TEMP_DIR/image/spec.json" 134 + 135 + log "build alpine microvm image tarball" 136 + ALPINE_TARBALL_PATH=$(nix build .#spindle-alpine-image-tarball --no-link --print-out-paths) 137 + mkdir -p "$TEMP_DIR/alpine-image" 138 + tar -C "$TEMP_DIR/alpine-image" -xzf "$ALPINE_TARBALL_PATH" 139 + ALPINE_IMAGE_SPEC_JSON="$TEMP_DIR/alpine-image/spec.json" 140 + 141 + kill_temp_dir_procs() { 142 + if [ -f "$TEMP_DIR/ncps.pid" ]; then 143 + kill "$(cat "$TEMP_DIR/ncps.pid")" 2>/dev/null || true 144 + fi 145 + pkill -TERM -f "$TEMP_DIR" 2>/dev/null || true 146 + local i 147 + for i in $(seq 1 20); do 148 + pgrep -f "$TEMP_DIR" >/dev/null 2>&1 || break 149 + sleep 0.25 150 + done 151 + pkill -KILL -f "$TEMP_DIR" 2>/dev/null || true 152 + } 153 + 154 + collect_logs() { 155 + echo "test failed. copying logs to /tmp/test-spindle-microvm-logs" 156 + mkdir -p /tmp/test-spindle-microvm-logs 157 + local f 158 + for f in "$TEMP_DIR"/*; do 159 + [ -f "$f" ] && cp "$f" /tmp/test-spindle-microvm-logs/ 160 + done 161 + local work logf 162 + for work in "$TEMP_DIR"/work-*; do 163 + [ -d "$work" ] || continue 164 + for logf in "$work"/*.log; do 165 + [ -f "$logf" ] || continue 166 + strip_ansi "$logf" > "/tmp/test-spindle-microvm-logs/$(basename "$work")-$(basename "$logf")" 167 + done 168 + done 169 + } 170 + 171 + CLEANED=0 172 + cleanup() { 173 + [ "$CLEANED" -eq 1 ] && return 174 + CLEANED=1 175 + 176 + print_summary 177 + log "cleaning up..." 178 + 179 + local jobs_pids 180 + jobs_pids=$(jobs -p) 181 + [ -n "$jobs_pids" ] && kill $jobs_pids 2>/dev/null || true 182 + 183 + kill_temp_dir_procs 184 + 185 + [ "$SUCCESS" -ne 1 ] && collect_logs 186 + 187 + chmod -R +w "$TEMP_DIR" 2>/dev/null || true 188 + rm -rf "$TEMP_DIR" 189 + echo "done" 190 + } 191 + trap cleanup EXIT 192 + # route signals through the EXIT trap so an interrupt still tears down VMs. 193 + trap 'exit 130' INT 194 + trap 'exit 143' TERM 195 + 196 + CACHE_PORT=$(pick_free_port) 197 + ./spindle/engines/microvm/start-test-cache.sh "$TEMP_DIR" "$CACHE_PORT" 198 + source "$TEMP_DIR/env.sh" 199 + 200 + run_vm() { 201 + local name="" 202 + local timeout="60s" 203 + local upload=0 204 + local activate="" 205 + local no_cache=0 206 + local db="" 207 + local spec="$IMAGE_SPEC_JSON" 208 + 209 + while [[ $# -gt 0 ]]; do 210 + case "$1" in 211 + --spec) 212 + spec="$2" 213 + shift 2 214 + ;; 215 + --name) 216 + name="$2" 217 + shift 2 218 + ;; 219 + --timeout) 220 + timeout="$2" 221 + shift 2 222 + ;; 223 + --upload) 224 + upload=1 225 + shift 226 + ;; 227 + --activate) 228 + activate="$2" 229 + shift 2 230 + ;; 231 + --no-cache) 232 + no_cache=1 233 + shift 234 + ;; 235 + --db) 236 + db="$2" 237 + shift 2 238 + ;; 239 + --) 240 + shift 241 + break 242 + ;; 243 + *) 244 + echo "unknown argument: $1" >&2 245 + exit 1 246 + ;; 247 + esac 248 + done 249 + 250 + local work_dir="$TEMP_DIR/work-${name}" 251 + mkdir -p "$work_dir" 252 + 253 + local args=( 254 + --image-spec "$spec" 255 + --work-dir "$work_dir" 256 + --exec-timeout "$timeout" 257 + --port "${SPINDLE_TEST_VSOCK_PORT:-10240}" 258 + --memory-mib 2049 259 + ) 260 + 261 + if [ "$no_cache" -eq 0 ]; then 262 + args+=( 263 + --cache-read-url "$CACHE_URL" 264 + --cache-trusted-public-key "$CACHE_PUBKEY" 265 + ) 266 + fi 267 + 268 + if [ "$upload" -eq 1 ]; then 269 + args+=( 270 + --cache-upload-url "$CACHE_UPLOAD_URL?secret-key=$CACHE_SECRET_KEY_PATH" 271 + ) 272 + fi 273 + 274 + if [ -n "$activate" ]; then 275 + args+=( 276 + --activate-config "$activate" 277 + ) 278 + fi 279 + 280 + if [ -n "$db" ]; then 281 + args+=( 282 + --db "$db" 283 + ) 284 + fi 285 + 286 + local out 287 + if ! out=$(spindle/spindle-microvm-run "${args[@]}" -- "$@" 2>&1); then 288 + echo "$out" | strip_ansi >&2 289 + strip_ansi "$work_dir/serial.log" >&2 290 + strip_ansi "$work_dir/qemu.log" >&2 291 + return 1 292 + fi 293 + echo "$out" 294 + } 295 + 296 + run_test_job() { 297 + local name="$1" 298 + local func="$2" 299 + local port="$3" 300 + export SPINDLE_TEST_VSOCK_PORT="$port" 301 + 302 + local logfile="$TEMP_DIR/test-${name}.log" 303 + local start 304 + start=$(get_time_ms) 305 + log "[$name] start (vsock port $port)" 306 + 307 + local status="Passed" 308 + if ! "$func" > "$logfile" 2>&1; then 309 + status="Failed" 310 + fi 311 + 312 + local duration_ms=$(($(get_time_ms) - start)) 313 + printf '%s\t%s\n' "$status" "$duration_ms" > "$TEMP_DIR/test-${name}.status" 314 + 315 + local duration_str 316 + duration_str=$(format_duration "$duration_ms") 317 + if [ "$status" = "Failed" ]; then 318 + printf "\n\033[0;31m>>> [%s] FAILED (%s)\033[0m\n" "$name" "$duration_str" 319 + strip_ansi "$logfile" || true 320 + else 321 + printf "\n\033[0;32m>>> [%s] passed (%s)\033[0m\n" "$name" "$duration_str" 322 + fi 323 + } 324 + 325 + # schedules every selected test across at most $JOBS concurrent VMs (each on its 326 + # own vsock port), then aggregates the per-test status files into the summary 327 + # arrays. returns 1 if any test failed, 0 otherwise. 328 + run_tests() { 329 + local base_port=10240 330 + local idx=0 331 + local running=0 332 + 333 + for func in "${TESTS[@]}"; do 334 + local name="${func#test_}" 335 + name="${name//_/-}" 336 + if [ -n "${TEST_ONLY:-}" ] && [ "${TEST_ONLY}" != "$name" ]; then 337 + continue 338 + fi 339 + 340 + run_test_job "$name" "$func" "$((base_port + idx))" & 341 + idx=$((idx + 1)) 342 + running=$((running + 1)) 343 + 344 + if [ "$running" -ge "$JOBS" ]; then 345 + wait -n || true 346 + running=$((running - 1)) 347 + fi 348 + done 349 + wait 350 + 351 + local failed=0 352 + for func in "${TESTS[@]}"; do 353 + local name="${func#test_}" 354 + name="${name//_/-}" 355 + if [ -n "${TEST_ONLY:-}" ] && [ "${TEST_ONLY}" != "$name" ]; then 356 + continue 357 + fi 358 + 359 + local statusfile="$TEMP_DIR/test-${name}.status" 360 + if [ ! -f "$statusfile" ]; then 361 + TEST_NAMES+=("$name") 362 + TEST_STATUSES+=("Failed") 363 + TEST_TIMES+=(0) 364 + failed=1 365 + continue 366 + fi 367 + 368 + local status duration_ms 369 + IFS=$'\t' read -r status duration_ms < "$statusfile" 370 + TEST_NAMES+=("$name") 371 + TEST_STATUSES+=("$status") 372 + TEST_TIMES+=("$duration_ms") 373 + if [ "$status" = "Failed" ]; then 374 + failed=1 375 + fi 376 + done 377 + 378 + return "$failed" 379 + } 380 + 381 + test_realize() { 382 + local test_store_path 383 + test_store_path=$(nix-build -E 'with import <nixpkgs> {}; writeText "test-file" "hello from cache"' --no-out-link) 384 + nix copy --to "$CACHE_UPLOAD_URL?secret-key=$CACHE_SECRET_KEY_PATH" "$test_store_path" 385 + 386 + local out 387 + out=$(run_vm --name "realize" --timeout "60s" -- /run/current-system/sw/bin/bash -lc ' 388 + set -euo pipefail 389 + store_path=$1 390 + cache_url=$(sed -n "s/^extra-substituters = //p" /run/spindle/nix.conf) 391 + cache_url=${cache_url%% *} 392 + if [ -z "$cache_url" ]; then 393 + echo "error: cache URL not found in /run/spindle/nix.conf" >&2 394 + exit 1 395 + fi 396 + 397 + http_version=$(/run/current-system/sw/bin/curl --http2-prior-knowledge -fsS -o /dev/null -w "%{http_version}" "$cache_url/nix-cache-info") 398 + echo "http_version=$http_version" 399 + case "$http_version" in 400 + 2|2.0) ;; 401 + *) 402 + echo "error: cache proxy did not negotiate HTTP/2 (got $http_version)" >&2 403 + exit 1 404 + ;; 405 + esac 406 + 407 + /run/current-system/sw/bin/nix-store --realise "$store_path" >/dev/null 408 + ' bash "$test_store_path") || return 1 409 + 410 + if ! echo "$out" | strip_ansi | grep -q -E "^http_version=2(\\.0)?$"; then 411 + echo "error: cache proxy did not report HTTP/2" >&2 412 + echo "$out" | strip_ansi >&2 413 + return 1 414 + fi 415 + echo "success: store path realized from cache and cache proxy accepted cleartext HTTP/2" 416 + } 417 + 418 + test_build_upload() { 419 + local nix_expr='with import <nixpkgs> {}; writeText "uploaded-test-file" "hello from vm upload"' 420 + local out 421 + out=$(run_vm --name "build-upload" --timeout "120s" --upload -- /run/current-system/sw/bin/bash -l -c "nix-build -E '$nix_expr' --no-out-link") || return 1 422 + 423 + local built_path 424 + built_path=$(echo "$out" | strip_ansi | grep -v '\.drv' | grep -o '/nix/store/[a-z0-9]*-uploaded-test-file' | head -n 1 || true) 425 + if [ -z "$built_path" ]; then 426 + echo "error: could not find built store path in vm output" >&2 427 + return 1 428 + fi 429 + echo "extracted path: $built_path" 430 + 431 + local hash 432 + hash=$(basename "$built_path" | cut -d'-' -f1) 433 + if ! curl -s -f "$CACHE_URL/${hash}.narinfo" > /dev/null; then 434 + echo "error: built store path was not uploaded to the binary cache" >&2 435 + return 1 436 + fi 437 + echo "success: store path uploaded to cache" 438 + } 439 + 440 + test_networking() { 441 + local hello_path 442 + hello_path=$(nix-build -E 'with import <nixpkgs> {}; hello' --no-out-link) 443 + 444 + local out 445 + out=$(run_vm --name "networking" --timeout "120s" --no-cache -- /run/current-system/sw/bin/bash -c "/run/current-system/sw/bin/curl -I --connect-timeout 1 -m 1 http://10.0.2.2:$CACHE_PORT; /run/current-system/sw/bin/nix-store --realise $hello_path") || return 1 446 + 447 + if echo "$out" | grep -qi -E "unreachable|timeout|failed to connect|timed out" || echo "$out" | grep -q "exited with code"; then 448 + echo "success: host network access blocked" 449 + else 450 + echo "error: guest vm accessed host network or returned unexpected output" >&2 451 + echo "$out" | strip_ansi >&2 452 + return 1 453 + fi 454 + echo "success: guest vm reached the internet and substituted hello" 455 + } 456 + 457 + test_substitution_and_no_upload() { 458 + local hello_path 459 + hello_path=$(nix-build -E 'with import <nixpkgs> {}; hello' --no-out-link) 460 + 461 + local out 462 + out=$(run_vm --name "nixpkgs-hello" --timeout "180s" --upload -- /run/current-system/sw/bin/nix-store --realise "$hello_path") || return 1 463 + 464 + # Check that it was substituted from our proxy 465 + if ! echo "$out" | strip_ansi | grep -q -E "copying path.*hello"; then 466 + echo "error: hello package was not substituted (or output mismatch)" >&2 467 + echo "$out" | strip_ansi >&2 468 + return 1 469 + fi 470 + 471 + # Check that nothing was uploaded to the cache 472 + if ! echo "$out" | strip_ansi | grep -q "cache uploaded: 0"; then 473 + echo "error: hello package substitution triggered cache upload" >&2 474 + echo "$out" | strip_ansi >&2 475 + return 1 476 + fi 477 + 478 + echo "success: hello package substituted from upstream cache and was not uploaded" 479 + } 480 + 481 + # a pinned registry, reused by the dependency and registry-pin tests. 482 + ACTIVATION_REGISTRY='"registry": { 483 + "nixpkgs": "github:nixos/nixpkgs/nixos-unstable", 484 + "my-nixpkgs": "nixpkgs" 485 + }' 486 + 487 + test_activation_services() { 488 + local config='{ 489 + "services": { 490 + "openssh": { 491 + "enable": true, 492 + "authorizedKeysFiles": ["/etc/ssh/authorized_keys"] 493 + } 494 + } 495 + }' 496 + local out 497 + out=$(run_vm --name "activation-services" --timeout "300s" --activate "$config" -- /run/current-system/sw/bin/systemctl is-active sshd) || return 1 498 + if ! echo "$out" | strip_ansi | grep -q "^active$"; then 499 + echo "error: sshd not active after activation" >&2 500 + echo "$out" | strip_ansi >&2 501 + return 1 502 + fi 503 + echo "success: openssh service active after activation" 504 + } 505 + 506 + test_activation_dependencies() { 507 + # cowsay as a bare dependency (resolved via the pinned nixpkgs registry); 508 + # hello via the github flakeref and (separately) the my-nixpkgs alias. 509 + local config='{ 510 + '"$ACTIVATION_REGISTRY"', 511 + "dependencies": [ 512 + "cowsay", 513 + "github:nixos/nixpkgs#hello", 514 + "my-nixpkgs#hello" 515 + ] 516 + }' 517 + local out 518 + out=$(run_vm --name "activation-dependencies" --timeout "600s" --activate "$config" -- /run/current-system/sw/bin/bash -l -c ' 519 + set -euo pipefail 520 + cowsay "registry pin ok" >/dev/null && echo "cowsay=ran" 521 + echo "hello=$(hello)" 522 + ') || return 1 523 + 524 + local clean 525 + clean=$(echo "$out" | strip_ansi) 526 + if ! echo "$clean" | grep -qF "cowsay=ran"; then 527 + echo "error: bare dependency 'cowsay' (resolved via the pinned nixpkgs registry) did not run" >&2 528 + echo "$clean" >&2 529 + return 1 530 + fi 531 + if ! echo "$clean" | grep -qF "hello=Hello, world!"; then 532 + echo "error: hello dependency (github flakeref + my-nixpkgs alias) did not run" >&2 533 + echo "$clean" >&2 534 + return 1 535 + fi 536 + echo "success: bare, flakeref, and aliased dependencies all resolved and ran" 537 + } 538 + 539 + test_activation_registry_pin() { 540 + # the nixpkgs the image itself was built from; the registry override must NOT 541 + # resolve to this. deterministic (locked in the repo flake), so safe to compare. 542 + local base_nixpkgs 543 + base_nixpkgs=$(nix eval --raw --impure --expr '(builtins.getFlake (toString ./.)).inputs.nixpkgs.outPath') 544 + 545 + local config='{ 546 + '"$ACTIVATION_REGISTRY"' 547 + }' 548 + # the pinned nixpkgs must reach the system nix config: resolving it via the 549 + # flakes CLI must not error "is not locked", and it must win the <nixpkgs> nixPath. 550 + local out 551 + out=$(run_vm --name "activation-registry-pin" --timeout "300s" --activate "$config" -- /run/current-system/sw/bin/bash -l -c ' 552 + set -euo pipefail 553 + echo "lib_version=$(nix eval --raw nixpkgs#lib.version)" 554 + echo "nix_path=$(nix eval --raw --impure --expr "toString <nixpkgs>")" 555 + ') || return 1 556 + 557 + local clean 558 + clean=$(echo "$out" | strip_ansi) 559 + if ! echo "$clean" | grep -qE "lib_version=[0-9]"; then 560 + echo "error: guest could not resolve nixpkgs#lib.version from the user registry (locked-ref failure?)" >&2 561 + echo "$clean" >&2 562 + return 1 563 + fi 564 + local guest_nixpath 565 + guest_nixpath=$(echo "$clean" | sed -n 's/^nix_path=//p' | head -n1) 566 + if [ -z "$guest_nixpath" ] || [ "$guest_nixpath" = "$base_nixpkgs" ]; then 567 + echo "error: guest <nixpkgs> nixPath did not resolve to the registry override (got '$guest_nixpath', base '$base_nixpkgs')" >&2 568 + return 1 569 + fi 570 + echo "success: pinned nixpkgs registry reached the guest nix config (flakes CLI + <nixpkgs> nixPath)" 571 + } 572 + 573 + test_activation_cache_substitution() { 574 + # a unique path that only exists in the configured (workflow) cache; the host 575 + # seeds it so the guest can prove it substitutes through the read proxy. 576 + local test_store_path 577 + test_store_path=$(nix-build -E 'with import <nixpkgs> {}; writeText "activation-cache-test" "hello from the workflow cache"' --no-out-link) 578 + nix copy --to "$CACHE_UPLOAD_URL?secret-key=$CACHE_SECRET_KEY_PATH" "$test_store_path" 579 + 580 + # a trivial config: this test only cares that the read proxy serves the 581 + # workflow-cache path during an activated run. 582 + local out 583 + out=$(run_vm --name "activation-cache-substitution" --timeout "300s" --activate '{}' -- /run/current-system/sw/bin/bash -l -c ' 584 + set -euo pipefail 585 + store_path=$1 586 + # the unique path only exists in the configured cache, so realising it proves it 587 + # was substituted through the read proxy and not built or found elsewhere. 588 + nix-store --realise "$store_path" >/dev/null 589 + echo "substituted=$(cat "$store_path")" 590 + ' bash "$test_store_path") || return 1 591 + 592 + if ! echo "$out" | strip_ansi | grep -qF "substituted=hello from the workflow cache"; then 593 + echo "error: unique path was not substituted from the configured cache" >&2 594 + echo "$out" | strip_ansi >&2 595 + return 1 596 + fi 597 + echo "success: unique path substituted from the workflow cache through the read proxy" 598 + } 599 + 600 + test_activation_docker() { 601 + local config='{ 602 + "virtualisation": { 603 + "docker": { "enable": true } 604 + } 605 + }' 606 + # docker.service is up, but the daemon socket can lag a beat behind activation; 607 + # wait for it to answer, then pull+run a real image. this drives the slimmed 608 + # kernel modules: overlay.ko storage plus bridge/iptables networking out of the 609 + # pruned tree, with outbound DNS/network over the guest slirp link. 610 + local out 611 + out=$(run_vm --name "activation-docker" --timeout "600s" --activate "$config" -- /run/current-system/sw/bin/bash -l -c ' 612 + set -euo pipefail 613 + echo "docker_unit=$(systemctl is-active docker)" 614 + for i in $(seq 1 60); do docker info >/dev/null 2>&1 && break; sleep 1; done 615 + docker info >/dev/null 616 + echo "storage_driver=$(docker info --format "{{.Driver}}")" 617 + docker run --rm alpine cat /etc/alpine-release | sed "s/^/alpine_release=/" 618 + docker run --rm alpine echo container-ran-ok 619 + ') || return 1 620 + 621 + local clean 622 + clean=$(echo "$out" | strip_ansi) 623 + if ! echo "$clean" | grep -q "^docker_unit=active$"; then 624 + echo "error: docker service not active after activation" >&2 625 + echo "$clean" >&2 626 + return 1 627 + fi 628 + if ! echo "$clean" | grep -qE "storage_driver=overlay(2|fs)"; then 629 + echo "error: docker is not using an overlay storage driver (overlay.ko missing?)" >&2 630 + return 1 631 + fi 632 + if ! echo "$clean" | grep -qE "alpine_release=[0-9]+\."; then 633 + echo "error: failed to pull and read the alpine image" >&2 634 + return 1 635 + fi 636 + if ! echo "$clean" | grep -q "container-ran-ok"; then 637 + echo "error: command did not run inside the alpine container" >&2 638 + return 1 639 + fi 640 + echo "success: docker service active, pulled and ran an alpine container on the overlay storage driver" 641 + } 642 + 643 + test_activation_cached_realize() { 644 + local config='{ 645 + "services": { 646 + "openssh": { 647 + "enable": true, 648 + "authorizedKeysFiles": ["/etc/ssh/authorized_keys"] 649 + } 650 + } 651 + }' 652 + local db_path="$TEMP_DIR/activation-cached.db" 653 + 654 + # first run: build the config, upload its closure, and record the toplevel in 655 + # the db. nothing cached yet, so this builds from scratch. 656 + local out 657 + out=$(run_vm --name "activation-cached-first" --timeout "600s" --activate "$config" --db "$db_path" --upload -- /run/current-system/sw/bin/systemctl is-active sshd) || return 1 658 + if ! echo "$out" | strip_ansi | grep -q "^active$"; then 659 + echo "error: sshd not active after first activation" >&2 660 + echo "$out" | strip_ansi >&2 661 + return 1 662 + fi 663 + 664 + # second run: same config + db, no upload. must realize the recorded toplevel 665 + # from the cache instead of rebuilding, and the cached system must come up. 666 + out=$(run_vm --name "activation-cached-second" --timeout "300s" --activate "$config" --db "$db_path" -- /run/current-system/sw/bin/systemctl is-active sshd) || return 1 667 + 668 + local clean 669 + clean=$(echo "$out" | strip_ansi) 670 + if ! echo "$clean" | grep -q "realizing cached NixOS config"; then 671 + echo "error: second run did not realize cached configuration" >&2 672 + echo "$clean" >&2 673 + return 1 674 + fi 675 + if ! echo "$clean" | grep -q "^active$"; then 676 + echo "error: sshd not active after cached config activation" >&2 677 + echo "$clean" >&2 678 + return 1 679 + fi 680 + echo "success: second run realized the cached NixOS config from the cache and sshd came up" 681 + } 682 + 683 + test_alpine() { 684 + local hello_path 685 + hello_path=$(nix-build -E 'with import <nixpkgs> {}; hello' --no-out-link) 686 + 687 + local out 688 + out=$(run_vm --spec "$ALPINE_IMAGE_SPEC_JSON" --name "alpine" --timeout "180s" --no-cache -- /bin/sh -lc ' 689 + set -eu 690 + export HOME=/workspace 691 + hello_path=$1 692 + echo "release=$(cat /etc/alpine-release)" 693 + echo "user=$(id -un)" 694 + git version 695 + bash -c "echo bash=\$BASH_VERSION" 696 + touch /workspace/write-test 697 + echo "workspace writable" 698 + git ls-remote https://tangled.org/@tangled.org/core HEAD >/dev/null 699 + echo "git over https ok" 700 + apk add make 701 + echo "apk ok" 702 + # substitute a real package from cache.nixos.org over HTTPS and run it 703 + nix-store --realise "$hello_path" >/dev/null 704 + echo "ran=$("$hello_path/bin/hello")" 705 + ' sh "$hello_path") || return 1 706 + 707 + echo "$out" | strip_ansi >&2 708 + for needle in "release=" "user=spindle-workflow" "git version" "bash=" "workspace writable" "git over https ok" "apk ok" "ran=Hello, world!"; do 709 + if ! echo "$out" | strip_ansi | grep -q "$needle"; then 710 + echo "error: alpine guest output missing $needle" >&2 711 + return 1 712 + fi 713 + done 714 + echo "success: alpine guest booted, ran as workflow user, wrote workspace, cloned + installed over the network, and substituted+ran a package from cache.nixos.org over HTTPS" 715 + } 716 + 717 + # asserts a store path's narinfo shows up in the local cache, retrying briefly 718 + # since the post-build-hook enqueues uploads asynchronously. 719 + cache_has_path() { 720 + local path="$1" 721 + local hash 722 + hash=$(basename "$path" | cut -d'-' -f1) 723 + local i 724 + for i in $(seq 1 20); do 725 + if curl -s -f "$CACHE_URL/${hash}.narinfo" > /dev/null; then 726 + return 0 727 + fi 728 + sleep 0.5 729 + done 730 + return 1 731 + } 732 + 733 + test_alpine_nix() { 734 + local test_store_path 735 + test_store_path=$(nix-build -E 'with import <nixpkgs> {}; writeText "alpine-nix-test" "hello from cache to alpine"' --no-out-link) 736 + nix copy --to "$CACHE_UPLOAD_URL?secret-key=$CACHE_SECRET_KEY_PATH" "$test_store_path" 737 + 738 + # exercise the full local-cache path: daemon connectivity, substitution, 739 + # store-db queries, and a build via *both* the classic (nix-build) and the 740 + # new flakes/nix-command (nix build) frontends. the two build derivations 741 + # use distinct names so we can confirm each got uploaded back to the cache. 742 + local out 743 + out=$(run_vm --spec "$ALPINE_IMAGE_SPEC_JSON" --name "alpine-nix" --timeout "180s" --upload -- /bin/sh -lc ' 744 + set -eu 745 + export HOME=/workspace 746 + store_path=$1 747 + 748 + echo "nix_version=$(nix --version | head -n1)" 749 + { nix store info >/dev/null 2>&1 || nix store ping >/dev/null 2>&1; } && echo "daemon=ok" 750 + 751 + # substitute a path from the cache and query the store db about it 752 + nix-store --realise "$store_path" >/dev/null 753 + echo "substituted=$(cat "$store_path")" 754 + echo "requisites=$(nix-store --query --requisites "$store_path" | wc -l | tr -d " ")" 755 + nix path-info --json "$store_path" >/dev/null && echo "path_info=ok" 756 + 757 + # build via the new CLI; the substituted path is declared as a real input 758 + # (builtins.storePath) so nix must realise it into the build sandbox first. 759 + # heredoc is unquoted (the outer guest script is single-quoted, so a quoted 760 + # delimiter would close it), hence \$ escapes what nix/the builder must expand. 761 + export DEP="$store_path" 762 + cat > /workspace/new.nix <<NIXEOF 763 + let dep = builtins.storePath (builtins.getEnv "DEP"); in 764 + derivation { 765 + name = "alpine-nix-build-new"; 766 + system = "x86_64-linux"; 767 + builder = "/bin/sh"; 768 + # the sandbox only provides the sh builtin shell (no coreutils in PATH), so 769 + # stick to builtins: the build only succeeds if nix realised the declared 770 + # storePath dependency into the sandbox, where [ -r ] can see it. 771 + args = [ "-c" "[ -r \${dep} ] && echo via-nix-build-with-dep > \$out" ]; 772 + } 773 + NIXEOF 774 + new_path=$(nix build --impure --file /workspace/new.nix --no-link --print-out-paths) 775 + echo "new_path=$new_path" 776 + echo "new_content=$(tr "\n" "|" < "$new_path")" 777 + 778 + # build via the classic CLI 779 + cat > /workspace/old.nix <<NIXEOF 780 + derivation { 781 + name = "alpine-nix-build-old"; 782 + system = "x86_64-linux"; 783 + builder = "/bin/sh"; 784 + args = [ "-c" "echo built-on-alpine > \$out" ]; 785 + } 786 + NIXEOF 787 + old_path=$(nix-build /workspace/old.nix --no-out-link) 788 + echo "old_path=$old_path" 789 + ' sh "$test_store_path") || return 1 790 + 791 + echo "$out" | strip_ansi >&2 792 + local clean 793 + clean=$(echo "$out" | strip_ansi) 794 + 795 + local needle 796 + for needle in "daemon=ok" "substituted=hello from cache to alpine" "path_info=ok"; do 797 + if ! echo "$clean" | grep -q "$needle"; then 798 + echo "error: alpine nix output missing '$needle'" >&2 799 + return 1 800 + fi 801 + done 802 + if ! echo "$clean" | grep -qE "requisites=[1-9][0-9]*"; then 803 + echo "error: store db query returned no requisites for the substituted path" >&2 804 + return 1 805 + fi 806 + if ! echo "$clean" | grep -q "new_content=via-nix-build-with-dep"; then 807 + echo "error: 'nix build' (new CLI) did not realise its substituted dependency and build" >&2 808 + return 1 809 + fi 810 + 811 + local new_path old_path 812 + new_path=$(echo "$clean" | grep -o 'new_path=/nix/store/[a-z0-9]*-alpine-nix-build-new' | cut -d= -f2) 813 + old_path=$(echo "$clean" | grep -o 'old_path=/nix/store/[a-z0-9]*-alpine-nix-build-old' | cut -d= -f2) 814 + if [ -z "$new_path" ] || [ -z "$old_path" ]; then 815 + echo "error: could not extract both built store paths from alpine guest output" >&2 816 + return 1 817 + fi 818 + if ! cache_has_path "$new_path"; then 819 + echo "error: nix-build (new CLI) output was not uploaded to the cache" >&2 820 + return 1 821 + fi 822 + if ! cache_has_path "$old_path"; then 823 + echo "error: nix-build (classic CLI) output was not uploaded to the cache" >&2 824 + return 1 825 + fi 826 + echo "success: alpine guest substituted, queried the store db, built via both CLIs, and uploaded both outputs" 827 + } 828 + 829 + TESTS=( 830 + test_alpine 831 + test_alpine_nix 832 + test_realize 833 + test_build_upload 834 + test_networking 835 + test_substitution_and_no_upload 836 + test_activation_services 837 + test_activation_dependencies 838 + test_activation_registry_pin 839 + test_activation_cache_substitution 840 + test_activation_docker 841 + test_activation_cached_realize 842 + ) 843 + 844 + log "running ${#TESTS[@]} tests" 845 + if ! run_tests; then 846 + exit 1 847 + fi 848 + 849 + SUCCESS=1 850 + log "passed!!"
+192
spindle/engines/microvm/upload_cache_proxy.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "fmt" 7 + "io" 8 + "log/slog" 9 + "net" 10 + "net/http" 11 + "net/http/httputil" 12 + "net/url" 13 + "strings" 14 + "time" 15 + 16 + "github.com/mdlayher/vsock" 17 + ) 18 + 19 + type UploadCacheProxy struct { 20 + port uint32 21 + 22 + ln *vsock.Listener 23 + server *http.Server 24 + } 25 + 26 + func StartUploadCacheProxy(ctx context.Context, cid uint32, uploadURL string, readUpstreams []CacheUpstream, logger *slog.Logger) (*UploadCacheProxy, error) { 27 + if strings.TrimSpace(uploadURL) == "" { 28 + return nil, nil 29 + } 30 + 31 + if logger == nil { 32 + logger = slog.Default() 33 + } 34 + logger = logger.With("where", "upload_cache_proxy", "cid", cid, "uploadURL", uploadURL) 35 + 36 + target, err := url.Parse(uploadURL) 37 + if err != nil { 38 + return nil, fmt.Errorf("parse upload URL %q: %w", uploadURL, err) 39 + } 40 + if target.Scheme != "http" && target.Scheme != "https" { 41 + return nil, fmt.Errorf("upload URL %q uses unsupported scheme %q (must be http or https)", uploadURL, target.Scheme) 42 + } 43 + if target.Host == "" { 44 + return nil, fmt.Errorf("upload URL %q is missing host", uploadURL) 45 + } 46 + 47 + ln, port, err := listenRandomVsockUploadPort(ctx) 48 + if err != nil { 49 + return nil, fmt.Errorf("listen for cache upload proxy: %w", err) 50 + } 51 + 52 + proxy := &UploadCacheProxy{ 53 + port: port, 54 + ln: ln, 55 + } 56 + proxy.server = &http.Server{ 57 + Handler: uploadProxyHandler(target, readUpstreams, logger), 58 + Protocols: cacheProxyProtocols(), 59 + ReadHeaderTimeout: 30 * time.Second, 60 + } 61 + 62 + filtered := &cidFilteredVsockListener{ 63 + Listener: ln, 64 + cid: cid, 65 + logger: logger, 66 + } 67 + go func() { 68 + if err := proxy.server.Serve(filtered); err != nil && !errors.Is(err, http.ErrServerClosed) && !errors.Is(err, net.ErrClosed) { 69 + logger.Warn("upload cache proxy stopped", "port", port, "error", err) 70 + } 71 + }() 72 + 73 + logger.Info("started upload cache proxy", "port", port, "target", uploadURL, "readUpstreams", len(readUpstreams)) 74 + return proxy, nil 75 + } 76 + 77 + func (p *UploadCacheProxy) Port() uint32 { 78 + if p == nil { 79 + return 0 80 + } 81 + return p.port 82 + } 83 + 84 + func (p *UploadCacheProxy) Close() error { 85 + if p == nil { 86 + return nil 87 + } 88 + 89 + var closeErr error 90 + if p.server != nil { 91 + ctx, cancel := context.WithTimeout(context.Background(), time.Second) 92 + closeErr = errors.Join(closeErr, p.server.Shutdown(ctx)) 93 + cancel() 94 + p.server = nil 95 + } 96 + if p.ln != nil { 97 + closeErr = errors.Join(closeErr, p.ln.Close()) 98 + p.ln = nil 99 + } 100 + return closeErr 101 + } 102 + 103 + func uploadProxyHandler(target *url.URL, readUpstreams []CacheUpstream, logger *slog.Logger) http.Handler { 104 + rp := httputil.NewSingleHostReverseProxy(target) 105 + rp.ErrorLog = slog.NewLogLogger(logger.Handler(), slog.LevelError) 106 + 107 + origDirector := rp.Director 108 + rp.Director = func(req *http.Request) { 109 + origDirector(req) 110 + // ensure host matches target 111 + req.Host = target.Host 112 + // the transport doesn't turn URL userinfo into basic auth, only 113 + // http.Client does, so do it ourselves 114 + if user := target.User; user != nil { 115 + password, _ := user.Password() 116 + req.SetBasicAuth(user.Username(), password) 117 + } 118 + } 119 + 120 + // before uploading, nix copy asks the destination whether it already has each 121 + // path by GET/HEAD-ing <hash>.narinfo and skips the ones it does. we answer 122 + // that check across the upload target *and* the read caches: if any of them 123 + // already serves the path there is no point uploading it (the guest would 124 + // just substitute it from there anyway). 125 + narinfoUpstreams := append([]CacheUpstream{{url: target}}, readUpstreams...) 126 + exists := &parallelRacingTransport{ 127 + upstreams: narinfoUpstreams, 128 + underlying: proxyTransport, 129 + guardedUnderlying: guardedProxyTransport, 130 + logger: logger, 131 + } 132 + 133 + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 134 + if isNarinfoExistenceCheck(r) { 135 + serveNarinfoExistence(w, r, exists, logger) 136 + return 137 + } 138 + rp.ServeHTTP(w, r) 139 + }) 140 + } 141 + 142 + func isNarinfoExistenceCheck(r *http.Request) bool { 143 + if r.Method != http.MethodGet && r.Method != http.MethodHead { 144 + return false 145 + } 146 + return strings.HasSuffix(r.URL.Path, ".narinfo") 147 + } 148 + 149 + func serveNarinfoExistence(w http.ResponseWriter, r *http.Request, exists http.RoundTripper, logger *slog.Logger) { 150 + probe := r.Clone(r.Context()) 151 + probe.RequestURI = "" 152 + 153 + resp, err := exists.RoundTrip(probe) 154 + if err != nil { 155 + logger.Warn("upload proxy narinfo check failed, treating as not present", "path", r.URL.Path, "error", err) 156 + w.WriteHeader(http.StatusNotFound) 157 + return 158 + } 159 + defer resp.Body.Close() 160 + 161 + for key, values := range resp.Header { 162 + for _, value := range values { 163 + w.Header().Add(key, value) 164 + } 165 + } 166 + w.WriteHeader(resp.StatusCode) 167 + if _, err := io.Copy(w, resp.Body); err != nil && !errors.Is(err, context.Canceled) { 168 + logger.Warn("upload proxy narinfo copy failed", "path", r.URL.Path, "error", err) 169 + } 170 + } 171 + 172 + func listenRandomVsockUploadPort(ctx context.Context) (*vsock.Listener, uint32, error) { 173 + var lastErr error 174 + for range 32 { 175 + port, err := randomVsockPort() 176 + if err != nil { 177 + return nil, 0, err 178 + } 179 + ln, err := vsock.Listen(port, nil) 180 + if err == nil { 181 + return ln, port, nil 182 + } 183 + lastErr = err 184 + 185 + select { 186 + case <-ctx.Done(): 187 + return nil, 0, ctx.Err() 188 + default: 189 + } 190 + } 191 + return nil, 0, fmt.Errorf("listen on random vsock upload port: %w", lastErr) 192 + }
+128
spindle/engines/microvm/upload_cache_proxy_test.go
··· 1 + package microvm 2 + 3 + import ( 4 + "io" 5 + "log/slog" 6 + "net/http" 7 + "net/http/httptest" 8 + "net/url" 9 + "strings" 10 + "testing" 11 + ) 12 + 13 + func TestUploadProxyRewritesHostAndAuth(t *testing.T) { 14 + var upstreamHost string 15 + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 16 + if req.Host != upstreamHost { 17 + t.Errorf("host: got %q, want %q", req.Host, upstreamHost) 18 + } 19 + if req.URL.Path != "/sub/abc.narinfo" { 20 + t.Errorf("path: got %q, want /sub/abc.narinfo", req.URL.Path) 21 + } 22 + if user, pass, ok := req.BasicAuth(); !ok || user != "dawn" || pass != "woof" { 23 + t.Errorf("basic auth: got %q/%q/%v, want dawn/hunter2/true", user, pass, ok) 24 + } 25 + _, _ = io.WriteString(w, "ok") 26 + })) 27 + defer upstream.Close() 28 + upstreamHost = strings.TrimPrefix(upstream.URL, "http://") 29 + 30 + target, err := url.Parse("http://dawn:woof@" + upstreamHost + "/sub/") 31 + if err != nil { 32 + t.Fatal(err) 33 + } 34 + 35 + req := httptest.NewRequest(http.MethodPut, "http://127.0.0.1:10501/abc.narinfo", strings.NewReader("narinfo")) 36 + req.Host = "127.0.0.1:10501" 37 + rec := httptest.NewRecorder() 38 + uploadProxyHandler(target, nil, slog.Default()).ServeHTTP(rec, req) 39 + 40 + if rec.Code != http.StatusOK { 41 + t.Fatalf("status: got %d, want 200; body=%q", rec.Code, rec.Body.String()) 42 + } 43 + } 44 + 45 + func mustParseURL(t *testing.T, raw string) *url.URL { 46 + t.Helper() 47 + u, err := url.Parse(raw) 48 + if err != nil { 49 + t.Fatalf("parse %q: %v", raw, err) 50 + } 51 + return u 52 + } 53 + 54 + func TestUploadProxySkipsNarinfoAvailableUpstream(t *testing.T) { 55 + var uploadHits int 56 + target := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 57 + uploadHits++ 58 + w.WriteHeader(http.StatusNotFound) 59 + })) 60 + defer target.Close() 61 + 62 + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 63 + if req.URL.Path != "/abc.narinfo" { 64 + t.Errorf("upstream path: got %q, want /abc.narinfo", req.URL.Path) 65 + } 66 + _, _ = io.WriteString(w, "StorePath: /nix/store/abc\n") 67 + })) 68 + defer upstream.Close() 69 + 70 + handler := uploadProxyHandler( 71 + mustParseURL(t, target.URL), 72 + []CacheUpstream{{url: mustParseURL(t, upstream.URL)}}, 73 + slog.Default(), 74 + ) 75 + 76 + req := httptest.NewRequest(http.MethodGet, "http://127.0.0.1:10501/abc.narinfo", nil) 77 + rec := httptest.NewRecorder() 78 + handler.ServeHTTP(rec, req) 79 + 80 + if rec.Code != http.StatusOK { 81 + t.Fatalf("status: got %d, want 200 (so nix treats the path as present and skips upload)", rec.Code) 82 + } 83 + if !strings.Contains(rec.Body.String(), "StorePath: /nix/store/abc") { 84 + t.Fatalf("body: got %q, want the upstream narinfo body", rec.Body.String()) 85 + } 86 + } 87 + 88 + func TestUploadProxyUploadsNarinfoNobodyHas(t *testing.T) { 89 + target := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 90 + w.WriteHeader(http.StatusNotFound) 91 + })) 92 + defer target.Close() 93 + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 94 + w.WriteHeader(http.StatusNotFound) 95 + })) 96 + defer upstream.Close() 97 + 98 + handler := uploadProxyHandler( 99 + mustParseURL(t, target.URL), 100 + []CacheUpstream{{url: mustParseURL(t, upstream.URL)}}, 101 + slog.Default(), 102 + ) 103 + 104 + req := httptest.NewRequest(http.MethodGet, "http://127.0.0.1:10501/abc.narinfo", nil) 105 + rec := httptest.NewRecorder() 106 + handler.ServeHTTP(rec, req) 107 + 108 + if rec.Code != http.StatusNotFound { 109 + t.Fatalf("status: got %d, want 404 (so nix uploads the path)", rec.Code) 110 + } 111 + } 112 + 113 + func TestUploadProxySkipsNarinfoAlreadyOnTarget(t *testing.T) { 114 + target := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 115 + _, _ = io.WriteString(w, "StorePath: /nix/store/abc\n") 116 + })) 117 + defer target.Close() 118 + 119 + handler := uploadProxyHandler(mustParseURL(t, target.URL), nil, slog.Default()) 120 + 121 + req := httptest.NewRequest(http.MethodGet, "http://127.0.0.1:10501/abc.narinfo", nil) 122 + rec := httptest.NewRecorder() 123 + handler.ServeHTTP(rec, req) 124 + 125 + if rec.Code != http.StatusOK { 126 + t.Fatalf("status: got %d, want 200", rec.Code) 127 + } 128 + }
+370
spindle/engines/microvm/vm.go
··· 1 + package microvm 2 + 3 + import ( 4 + "context" 5 + "crypto/rand" 6 + "encoding/binary" 7 + "errors" 8 + "fmt" 9 + "io" 10 + "log/slog" 11 + "maps" 12 + "math" 13 + "net" 14 + "os" 15 + "os/exec" 16 + "path/filepath" 17 + "slices" 18 + "strings" 19 + "sync/atomic" 20 + "time" 21 + 22 + "tangled.org/core/spindle/models" 23 + ) 24 + 25 + const ( 26 + minGuestCID = 3 27 + vmCrashLogTailBytes = 4096 28 + ) 29 + 30 + func AllocateCID() (uint32, error) { 31 + var data [4]byte 32 + if _, err := rand.Read(data[:]); err != nil { 33 + return 0, fmt.Errorf("allocate guest CID: %w", err) 34 + } 35 + return minGuestCID + binary.BigEndian.Uint32(data[:])%60000, nil 36 + } 37 + 38 + func prepareWorkDir(workDir string) error { 39 + if workDir == "" { 40 + return fmt.Errorf("microvm work directory is required") 41 + } 42 + if err := os.MkdirAll(workDir, 0o755); err != nil { 43 + return fmt.Errorf("create microvm work directory: %w", err) 44 + } 45 + return nil 46 + } 47 + 48 + func prepareVolumes(ctx context.Context, workDir string, volumes []Volume, mkfsExt4 string) (map[string]string, error) { 49 + paths := make(map[string]string, len(volumes)) 50 + for _, volume := range volumes { 51 + if volume.ReadOnly { 52 + return nil, fmt.Errorf("read-only microvm volume %q is not supported yet", volume.Image) 53 + } 54 + if volume.FSType != "ext4" { 55 + return nil, fmt.Errorf("microvm volume %q uses unsupported fsType %q", volume.Image, volume.FSType) 56 + } 57 + if volume.ImageType != "" && volume.ImageType != "raw" { 58 + return nil, fmt.Errorf("microvm volume %q uses unsupported imageType %q", volume.Image, volume.ImageType) 59 + } 60 + 61 + path := filepath.Join(workDir, filepath.Base(volume.Image)) 62 + if err := createSparseFile(path, volume.SizeMiB); err != nil { 63 + return nil, err 64 + } 65 + noJournal := volume.MountPoint == "/workspace" 66 + if err := runMkfsExt4(ctx, mkfsExt4, path, noJournal); err != nil { 67 + return nil, err 68 + } 69 + paths[volume.Image] = path 70 + } 71 + return paths, nil 72 + } 73 + 74 + func createSparseFile(path string, sizeMiB int64) error { 75 + if sizeMiB <= 0 { 76 + return fmt.Errorf("sparse file %q size must be positive", path) 77 + } 78 + if sizeMiB > math.MaxInt64/(1024*1024) { 79 + return fmt.Errorf("sparse file %q size is too large", path) 80 + } 81 + file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600) 82 + if err != nil { 83 + return fmt.Errorf("create sparse file %q: %w", path, err) 84 + } 85 + defer file.Close() 86 + 87 + if err := file.Truncate(sizeMiB * 1024 * 1024); err != nil { 88 + return fmt.Errorf("resize sparse file %q: %w", path, err) 89 + } 90 + return nil 91 + } 92 + 93 + func runMkfsExt4(ctx context.Context, mkfsExt4, path string, noJournal bool) error { 94 + if mkfsExt4 == "" { 95 + return fmt.Errorf("mkfs.ext4 path is required") 96 + } 97 + args := []string{"-F"} 98 + if noJournal { 99 + args = append(args, "-O", "^has_journal") 100 + } 101 + args = append(args, path) 102 + 103 + cmd := exec.CommandContext(ctx, mkfsExt4, args...) 104 + output, err := cmd.CombinedOutput() 105 + if err != nil { 106 + return fmt.Errorf("mkfs.ext4 %q: %w: %s", path, err, strings.TrimSpace(string(output))) 107 + } 108 + return nil 109 + } 110 + 111 + func createParentedFile(path string) (*os.File, error) { 112 + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { 113 + return nil, fmt.Errorf("create log directory: %w", err) 114 + } 115 + file, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) 116 + if err != nil { 117 + return nil, fmt.Errorf("create log file %q: %w", path, err) 118 + } 119 + return file, nil 120 + } 121 + 122 + type VMLogs struct { 123 + Serial string 124 + Extra map[string]string 125 + } 126 + 127 + type VMHandle interface { 128 + Shutdown(ctx context.Context) error 129 + WaitContext(ctx context.Context) error 130 + Close() error 131 + Logs() VMLogs 132 + CID() uint32 133 + WorkDir() string 134 + OOMKilled() bool 135 + } 136 + 137 + type VMConfig struct { 138 + Image ImageSpec 139 + CID uint32 140 + EnableKVM bool 141 + WorkDir string 142 + Cgroup CgroupLimits 143 + 144 + BootTimeout time.Duration 145 + MkfsExt4 string 146 + Dev bool 147 + } 148 + 149 + type workflowState struct { 150 + ImageSpec ImageSpec 151 + ImageSpecPath string 152 + Config manifestConfig 153 + ConfigKey string 154 + Image string 155 + CacheReadURLs []string 156 + CacheTrustedPublicKeys []string 157 + VM VMHandle 158 + Agent *AgentSession 159 + ReadCache *ReadCacheProxy 160 + UploadCache *UploadCacheProxy 161 + DNSProxy *DNSProxy 162 + WorkDir string 163 + NixOSToplevelCache nixosToplevelCacheStore 164 + } 165 + 166 + func (e *Engine) cleanupState(ctx context.Context, wid models.WorkflowId, state *workflowState) error { 167 + if state == nil { 168 + return nil 169 + } 170 + 171 + ctx = context.WithoutCancel(ctx) 172 + 173 + var err error 174 + err = errors.Join(err, e.drainNixCache(ctx, state)) 175 + err = errors.Join(err, e.shutdownVM(ctx, wid, state)) 176 + err = errors.Join(err, closeIO(&state.Agent)) 177 + err = errors.Join(err, closeIO(&state.ReadCache)) 178 + err = errors.Join(err, closeIO(&state.UploadCache)) 179 + err = errors.Join(err, closeIO(&state.DNSProxy)) 180 + err = errors.Join(err, removeWorkDir(state)) 181 + return err 182 + } 183 + 184 + func (e *Engine) drainNixCache(ctx context.Context, state *workflowState) error { 185 + if state.Agent == nil || e.cfg.NixCache.UploadURL == "" { 186 + return nil 187 + } 188 + 189 + drainCtx, cancel := context.WithTimeout(ctx, cacheDrainTimeout) 190 + defer cancel() 191 + 192 + if _, err := state.Agent.Drain(drainCtx); err != nil { 193 + return fmt.Errorf("drain nix cache: %w", err) 194 + } 195 + return nil 196 + } 197 + 198 + func (e *Engine) shutdownVM(ctx context.Context, wid models.WorkflowId, state *workflowState) error { 199 + if state.VM == nil { 200 + return nil 201 + } 202 + 203 + var err error 204 + 205 + if state.Agent != nil { 206 + gracefulCtx, cancel := context.WithTimeout(ctx, vmShutdownTimeout) 207 + poweredOff, poweroffErr := e.poweroffViaAgent(gracefulCtx, wid, state) 208 + cancel() 209 + 210 + err = errors.Join(err, poweroffErr) 211 + if poweredOff { 212 + return errors.Join(err, closeIO(&state.VM)) 213 + } 214 + } 215 + 216 + fallbackCtx, cancel := context.WithTimeout(ctx, vmShutdownTimeout) 217 + defer cancel() 218 + 219 + if shutdownErr := state.VM.Shutdown(fallbackCtx); shutdownErr != nil { 220 + e.l.Warn("microVM shutdown fallback failed", "workflow", wid, "error", shutdownErr) 221 + err = errors.Join(err, shutdownErr) 222 + } 223 + 224 + return errors.Join(err, closeIO(&state.VM)) 225 + } 226 + 227 + func (e *Engine) poweroffViaAgent(ctx context.Context, wid models.WorkflowId, state *workflowState) (bool, error) { 228 + if err := state.Agent.Poweroff(ctx); err != nil { 229 + e.l.Warn("agent poweroff request failed", "workflow", wid, "error", err) 230 + return false, err 231 + } 232 + 233 + if err := state.VM.WaitContext(ctx); err != nil { 234 + e.l.Warn("agent poweroff did not stop microVM", "workflow", wid, "error", err) 235 + return false, nil 236 + } 237 + 238 + return true, nil 239 + } 240 + 241 + // helper for closing io interfaces, sets to nil to prevent double-close 242 + func closeIO[T io.Closer](field *T) error { 243 + closer := *field 244 + var zero T 245 + *field = zero 246 + if any(closer) == any(zero) { 247 + return nil 248 + } 249 + return closer.Close() 250 + } 251 + 252 + func removeWorkDir(state *workflowState) error { 253 + if state.WorkDir == "" { 254 + return nil 255 + } 256 + 257 + err := os.RemoveAll(state.WorkDir) 258 + state.WorkDir = "" 259 + return err 260 + } 261 + 262 + // returns a context derived from ctx that is cancelled either when ctx itself 263 + // is cancelled or when the microVM exits on its own. the returned flag reports 264 + // whether the VM exited (as opposed to ctx being cancelled for another reason, 265 + // e.g. the workflow timeout), letting callers tell a crash apart from a 266 + // timeout. cancel must be called to release the watcher goroutine. 267 + func watchVMExit(ctx context.Context, vm VMHandle) (context.Context, *atomic.Bool, context.CancelFunc) { 268 + exited := &atomic.Bool{} 269 + watchCtx, cancel := context.WithCancel(ctx) 270 + if vm == nil { 271 + return watchCtx, exited, cancel 272 + } 273 + go func() { 274 + _ = vm.WaitContext(watchCtx) // returns when VM exits or watchCtx is cancelled 275 + if watchCtx.Err() == nil { 276 + exited.Store(true) 277 + cancel() // don't forget to cancel the watchCtx... 278 + } 279 + }() 280 + return watchCtx, exited, cancel 281 + } 282 + 283 + func vmCrashLog(vm VMHandle) string { 284 + if vm == nil { 285 + return "" 286 + } 287 + logs := vm.Logs() 288 + 289 + var b strings.Builder 290 + if tail := tailFile(logs.Serial, vmCrashLogTailBytes); tail != "" { 291 + fmt.Fprintf(&b, "==== serial log ====\n%s\n", tail) 292 + } 293 + for _, name := range slices.Sorted(maps.Keys(logs.Extra)) { 294 + if tail := tailFile(logs.Extra[name], vmCrashLogTailBytes); tail != "" { 295 + fmt.Fprintf(&b, "==== %s log ====\n%s\n", name, tail) 296 + } 297 + } 298 + return strings.TrimRight(b.String(), "\n") 299 + } 300 + 301 + func tailFile(path string, max int64) string { 302 + if path == "" { 303 + return "" 304 + } 305 + f, err := os.Open(path) 306 + if err != nil { 307 + return "" 308 + } 309 + defer f.Close() 310 + if info, err := f.Stat(); err == nil && info.Size() > max { 311 + if _, err := f.Seek(-max, io.SeekEnd); err != nil { 312 + return "" 313 + } 314 + } 315 + data, err := io.ReadAll(f) 316 + if err != nil { 317 + return "" 318 + } 319 + return strings.TrimSpace(string(data)) 320 + } 321 + 322 + func waitAgentConn(ctx context.Context, connCh <-chan net.Conn) (net.Conn, error) { 323 + select { 324 + case conn := <-connCh: 325 + if conn == nil { 326 + return nil, fmt.Errorf("agent connection closed before setup") 327 + } 328 + return conn, nil 329 + case <-ctx.Done(): 330 + return nil, fmt.Errorf("waiting for agent: %w", ctx.Err()) 331 + } 332 + } 333 + 334 + func StartVM(ctx context.Context, cfg VMConfig, logger *slog.Logger) (VMHandle, error) { 335 + if logger == nil { 336 + logger = slog.Default() 337 + } 338 + 339 + runner, err := runnerFor(cfg.Image.RunnerType) 340 + if err != nil { 341 + return nil, err 342 + } 343 + if err := cfg.Image.Validate(); err != nil { 344 + return nil, err 345 + } 346 + if err := cfg.Image.validateImageFiles(); err != nil { 347 + return nil, err 348 + } 349 + if err := runner.Validate(cfg.Image, cfg.EnableKVM); err != nil { 350 + return nil, err 351 + } 352 + 353 + if err := prepareWorkDir(cfg.WorkDir); err != nil { 354 + return nil, err 355 + } 356 + 357 + mkfsExt4 := cfg.MkfsExt4 358 + if mkfsExt4 == "" { 359 + mkfsExt4, err = exec.LookPath("mkfs.ext4") 360 + if err != nil { 361 + return nil, fmt.Errorf("mkfs.ext4 command not found in PATH: %w", err) 362 + } 363 + } 364 + volumePaths, err := prepareVolumes(ctx, cfg.WorkDir, cfg.Image.Volumes, mkfsExt4) 365 + if err != nil { 366 + return nil, err 367 + } 368 + 369 + return runner.Start(ctx, cfg, volumePaths, logger) 370 + }
+24 -4
spindle/engines/nixery/engine.go
··· 39 39 l *slog.Logger 40 40 cfg *config.Config 41 41 42 + slotter engine.WorkflowSlotter 43 + 42 44 cleanupMu sync.Mutex 43 45 cleanup map[string][]cleanupFunc 44 46 } ··· 168 170 l := log.FromContext(ctx).With("component", "spindle") 169 171 170 172 e := &Engine{ 171 - docker: dcli, 172 - l: l, 173 - cfg: cfg, 173 + docker: dcli, 174 + l: l, 175 + cfg: cfg, 176 + slotter: engine.NewSemaphoreSlotter(cfg.NixeryPipelines.MaxConcurrentWorkflows), 174 177 } 175 178 176 179 e.cleanup = make(map[string][]cleanupFunc) 177 180 178 181 return e, nil 182 + } 183 + 184 + func (e *Engine) AcquireWorkflowSlot( 185 + ctx context.Context, 186 + wid models.WorkflowId, 187 + wf *models.Workflow, 188 + ) (engine.WorkflowSlot, error) { 189 + if e.slotter == nil { 190 + return engine.NoopSlot{}, nil 191 + } 192 + 193 + return e.slotter.AcquireWorkflowSlot(ctx, wid, wf) 179 194 } 180 195 181 196 func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow, wfLogger models.WorkflowLogger) error { ··· 235 250 l.Info("creating container") 236 251 wfLogger.DataWriter(setupStepIdx, "stdout").Write([]byte("Creating container...")) 237 252 253 + extraHosts := []string{"host.docker.internal:host-gateway"} 254 + for _, h := range e.cfg.Server.DevExtraHosts { 255 + extraHosts = append(extraHosts, h+":host-gateway") 256 + } 257 + 238 258 resp, err := e.docker.ContainerCreate(ctx, &container.Config{ 239 259 Image: addl.image, 240 260 Cmd: []string{"cat"}, ··· 265 285 CapDrop: []string{"ALL"}, 266 286 CapAdd: []string{"CAP_DAC_OVERRIDE", "CAP_CHOWN", "CAP_FOWNER", "CAP_SETUID", "CAP_SETGID"}, 267 287 SecurityOpt: []string{"no-new-privileges"}, 268 - ExtraHosts: []string{"host.docker.internal:host-gateway"}, 288 + ExtraHosts: extraHosts, 269 289 Resources: container.Resources{ 270 290 Memory: e.cfg.NixeryPipelines.MaxJobMemoryMB * 1024 * 1024, 271 291 },
+18 -13
spindle/models/clone.go
··· 5 5 "strings" 6 6 7 7 "tangled.org/core/api/tangled" 8 + "tangled.org/core/hostutil" 8 9 "tangled.org/core/workflow" 9 10 ) 10 11 ··· 55 56 } 56 57 } 57 58 58 - repoURL := BuildRepoURL(tr.Repo, dev) 59 + repoURL := BuildRepoURL(tr.Repo) 59 60 60 61 var cloneOpts tangled.Pipeline_CloneOpts 61 62 if twf.Clone != nil { ··· 63 64 } 64 65 fetchArgs := buildFetchArgs(cloneOpts, commitSHA) 65 66 67 + // In dev mode we point at Caddy via host-gateway with a self-signed cert, 68 + // so skip the TLS check for the fetch call. 69 + fetchCmd := "git fetch" 70 + if dev { 71 + fetchCmd = "git -c http.sslVerify=false fetch" 72 + } 73 + 66 74 return CloneStep{ 67 75 kind: StepKindSystem, 68 76 name: "Clone repository into workspace", 69 77 commands: []string{ 70 78 "git init", 71 79 fmt.Sprintf("git remote add origin %s", repoURL), 72 - fmt.Sprintf("git fetch %s", strings.Join(fetchArgs, " ")), 80 + fmt.Sprintf("%s %s", fetchCmd, strings.Join(fetchArgs, " ")), 73 81 "git checkout FETCH_HEAD", 74 82 }, 75 83 } ··· 102 110 } 103 111 104 112 // BuildRepoURL constructs the repository URL from repo metadata. 105 - func BuildRepoURL(repo *tangled.Pipeline_TriggerRepo, devMode bool) string { 106 - scheme := "https://" 107 - if devMode { 108 - scheme = "http://" 113 + func BuildRepoURL(repo *tangled.Pipeline_TriggerRepo) string { 114 + if repo == nil { 115 + return "" 109 116 } 110 117 111 - // Get host from knot 112 - host := repo.Knot 113 - 114 - // In dev mode, replace localhost with host.docker.internal for Docker networking 115 - if devMode && strings.Contains(host, "localhost") { 116 - host = strings.ReplaceAll(host, "localhost", "host.docker.internal") 118 + host, noSSL, _ := hostutil.ParseHostname(repo.Knot) 119 + scheme := "https" 120 + if noSSL { 121 + scheme = "http" 117 122 } 118 123 119 - return fmt.Sprintf("%s%s/%s", scheme, host, *repo.RepoDid) 124 + return fmt.Sprintf("%s://%s/%s", scheme, host, *repo.RepoDid) 120 125 } 121 126 122 127 // buildFetchArgs constructs the arguments for git fetch based on clone options
+5 -5
spindle/models/clone_test.go
··· 166 166 167 167 func TestBuildCloneStep_DevMode(t *testing.T) { 168 168 twf := tangled.Pipeline_Workflow{ 169 + Engine: "nixery", 169 170 Clone: &tangled.Pipeline_CloneOpts{ 170 171 Depth: 1, 171 172 Skip: false, ··· 177 178 NewSha: "abc123", 178 179 }, 179 180 Repo: &tangled.Pipeline_TriggerRepo{ 180 - Knot: "localhost:3000", 181 + Knot: "knot.tngl.boltless.dev", 181 182 Did: "did:plc:user123", 182 183 Repo: sp("my-repo"), 183 184 RepoDid: sp("did:plc:boltless"), ··· 186 187 187 188 step := BuildCloneStep(twf, tr, true) 188 189 189 - // In dev mode, should use http:// and replace localhost with host.docker.internal 190 + // In dev mode, sslVerify should be disabled 190 191 allCmds := strings.Join(step.Commands(), " ") 191 - expectedURL := "http://host.docker.internal:3000/did:plc:boltless" 192 - if !strings.Contains(allCmds, expectedURL) { 193 - t.Errorf("Expected dev mode URL '%s' in commands", expectedURL) 192 + if !strings.Contains(allCmds, "git -c http.sslVerify=false fetch") { 193 + t.Error("Expected sslVerify to be disabled in dev mode clone commands") 194 194 } 195 195 } 196 196
+2 -2
spindle/models/pipeline_env.go
··· 10 10 11 11 // PipelineEnvVars extracts environment variables from pipeline trigger metadata. 12 12 // These are framework-provided variables that are injected into workflow steps. 13 - func PipelineEnvVars(tr *tangled.Pipeline_TriggerMetadata, pipelineId PipelineId, devMode bool) map[string]string { 13 + func PipelineEnvVars(tr *tangled.Pipeline_TriggerMetadata, pipelineId PipelineId) map[string]string { 14 14 if tr == nil { 15 15 return nil 16 16 } ··· 34 34 env["TANGLED_REPO_REPO_DID"] = *tr.Repo.RepoDid 35 35 } 36 36 env["TANGLED_REPO_DEFAULT_BRANCH"] = tr.Repo.DefaultBranch 37 - env["TANGLED_REPO_URL"] = BuildRepoURL(tr.Repo, devMode) 37 + env["TANGLED_REPO_URL"] = BuildRepoURL(tr.Repo) 38 38 } 39 39 40 40 switch workflow.TriggerKind(tr.Kind) {
+8 -9
spindle/models/pipeline_env_test.go
··· 27 27 Knot: "example.com", 28 28 Rkey: "123123", 29 29 } 30 - env := PipelineEnvVars(tr, id, false) 30 + env := PipelineEnvVars(tr, id) 31 31 32 32 // Check standard CI variable 33 33 if env["CI"] != "true" { ··· 90 90 Knot: "example.com", 91 91 Rkey: "123123", 92 92 } 93 - env := PipelineEnvVars(tr, id, false) 93 + env := PipelineEnvVars(tr, id) 94 94 95 95 if env["TANGLED_REF"] != "refs/tags/v1.2.3" { 96 96 t.Errorf("Expected TANGLED_REF='refs/tags/v1.2.3', got '%s'", env["TANGLED_REF"]) ··· 123 123 Knot: "example.com", 124 124 Rkey: "123123", 125 125 } 126 - env := PipelineEnvVars(tr, id, false) 126 + env := PipelineEnvVars(tr, id) 127 127 128 128 // Check ref variables for PR 129 129 if env["TANGLED_REF"] != "refs/heads/feature-branch" { ··· 179 179 Knot: "example.com", 180 180 Rkey: "123123", 181 181 } 182 - env := PipelineEnvVars(tr, id, false) 182 + env := PipelineEnvVars(tr, id) 183 183 184 184 // Check manual input variables 185 185 if env["TANGLED_INPUT_VERSION"] != "1.0.0" { ··· 216 216 Knot: "example.com", 217 217 Rkey: "123123", 218 218 } 219 - env := PipelineEnvVars(tr, id, true) 219 + env := PipelineEnvVars(tr, id) 220 220 221 - // Dev mode should use http:// and replace localhost with host.docker.internal 222 - expectedURL := "http://host.docker.internal:3000/did:plc:boltless" 221 + expectedURL := "http://localhost:3000/did:plc:boltless" 223 222 if env["TANGLED_REPO_URL"] != expectedURL { 224 223 t.Errorf("Expected TANGLED_REPO_URL='%s', got '%s'", expectedURL, env["TANGLED_REPO_URL"]) 225 224 } ··· 230 229 Knot: "example.com", 231 230 Rkey: "123123", 232 231 } 233 - env := PipelineEnvVars(nil, id, false) 232 + env := PipelineEnvVars(nil, id) 234 233 235 234 if env != nil { 236 235 t.Error("Expected nil env for nil trigger") ··· 252 251 Knot: "example.com", 253 252 Rkey: "123123", 254 253 } 255 - env := PipelineEnvVars(tr, id, false) 254 + env := PipelineEnvVars(tr, id) 256 255 257 256 // Should still have repo variables 258 257 if env["TANGLED_REPO_KNOT"] != "example.com" {
+86 -14
spindle/queue/queue.go
··· 1 1 package queue 2 2 3 3 import ( 4 + "slices" 4 5 "sync" 6 + 7 + "github.com/bluesky-social/indigo/atproto/syntax" 5 8 ) 6 9 7 10 type Job struct { ··· 9 12 OnFail func(error) 10 13 } 11 14 15 + type ownedJob struct { 16 + owner syntax.DID 17 + job Job 18 + } 19 + 20 + // prefers users with fewer running jobs, otherwise it's FIFO 12 21 type Queue struct { 13 - jobs chan Job 22 + mu sync.Mutex 23 + cond *sync.Cond 24 + queue []ownedJob 25 + running map[syntax.DID]int 26 + maxSize int 14 27 workers int 28 + stopped bool 15 29 wg sync.WaitGroup 16 30 } 17 31 18 32 func NewQueue(queueSize, numWorkers int) *Queue { 19 - return &Queue{ 20 - jobs: make(chan Job, queueSize), 33 + q := &Queue{ 34 + maxSize: queueSize, 21 35 workers: numWorkers, 36 + running: make(map[syntax.DID]int), 22 37 } 38 + q.cond = sync.NewCond(&q.mu) 39 + return q 23 40 } 24 41 25 - func (q *Queue) Enqueue(job Job) bool { 26 - select { 27 - case q.jobs <- job: 28 - return true 29 - default: 42 + // todo(dawn): add a per-user cap so a single user can't fill the queue 43 + func (q *Queue) Enqueue(owner syntax.DID, job Job) bool { 44 + q.mu.Lock() 45 + defer q.mu.Unlock() 46 + if q.stopped || len(q.queue) >= q.maxSize { 30 47 return false 31 48 } 49 + q.queue = append(q.queue, ownedJob{owner: owner, job: job}) 50 + q.cond.Signal() 51 + return true 32 52 } 33 53 34 54 func (q *Queue) Start() { ··· 40 60 41 61 func (q *Queue) worker() { 42 62 defer q.wg.Done() 43 - for job := range q.jobs { 44 - if err := job.Run(); err != nil { 45 - if job.OnFail != nil { 46 - job.OnFail(err) 47 - } 63 + for { 64 + picked, ok := q.takeNext() 65 + if !ok { 66 + return 67 + } 68 + 69 + err := picked.job.Run() 70 + if err != nil && picked.job.OnFail != nil { 71 + picked.job.OnFail(err) 48 72 } 73 + 74 + q.finish(picked.owner) 75 + } 76 + } 77 + 78 + // get or wait for the next job 79 + func (q *Queue) takeNext() (ownedJob, bool) { 80 + q.mu.Lock() 81 + defer q.mu.Unlock() 82 + 83 + for len(q.queue) == 0 && !q.stopped { 84 + q.cond.Wait() // waiting for jobs 85 + } 86 + if q.stopped && len(q.queue) == 0 { 87 + return ownedJob{}, false // no jobs are left and the queue is stopped 88 + } 89 + 90 + idx := q.pickBest() 91 + picked := q.queue[idx] 92 + q.queue = slices.Delete(q.queue, idx, idx+1) 93 + q.running[picked.owner]++ 94 + 95 + return picked, true 96 + } 97 + 98 + // index of the queued job whose owner has the fewest currently-running jobs, 99 + // tiebreaking by arrival order. 100 + func (q *Queue) pickBest() int { 101 + best := 0 102 + for idx, job := range q.queue { 103 + if q.running[job.owner] < q.running[q.queue[best].owner] { 104 + best = idx 105 + } 106 + } 107 + return best 108 + } 109 + 110 + // called when finishing a job 111 + func (q *Queue) finish(owner syntax.DID) { 112 + q.mu.Lock() 113 + defer q.mu.Unlock() 114 + 115 + q.running[owner]-- 116 + if q.running[owner] <= 0 { 117 + delete(q.running, owner) 49 118 } 50 119 } 51 120 52 121 func (q *Queue) Stop() { 53 - close(q.jobs) 122 + q.mu.Lock() 123 + q.stopped = true 124 + q.cond.Broadcast() 125 + q.mu.Unlock() 54 126 q.wg.Wait() 55 127 }
+103
spindle/queue/queue_test.go
··· 1 + package queue 2 + 3 + import ( 4 + "sync" 5 + "testing" 6 + "time" 7 + 8 + "github.com/bluesky-social/indigo/atproto/syntax" 9 + ) 10 + 11 + const ( 12 + alice = syntax.DID("did:plc:alice") 13 + eve = syntax.DID("did:plc:eve") 14 + dawn = syntax.DID("did:plc:dawn") 15 + ) 16 + 17 + func TestQueueDrainsAllJobs(t *testing.T) { 18 + t.Parallel() 19 + 20 + q := NewQueue(10, 2) 21 + q.Start() 22 + 23 + var mu sync.Mutex 24 + var ran []string 25 + done := make(chan struct{}, 5) 26 + 27 + for _, name := range []string{"a", "b", "c", "d", "e"} { 28 + q.Enqueue(dawn, Job{Run: func() error { 29 + mu.Lock() 30 + ran = append(ran, name) 31 + mu.Unlock() 32 + done <- struct{}{} 33 + return nil 34 + }}) 35 + } 36 + 37 + for range 5 { 38 + select { 39 + case <-done: 40 + case <-time.After(time.Second): 41 + t.Fatal("timed out waiting for jobs to finish") 42 + } 43 + } 44 + 45 + q.Stop() 46 + 47 + if len(ran) != 5 { 48 + t.Fatalf("expected 5 jobs, ran %d", len(ran)) 49 + } 50 + } 51 + 52 + func TestQueueRejectsWhenFull(t *testing.T) { 53 + t.Parallel() 54 + 55 + // no workers, so the queue never drains 56 + q := NewQueue(2, 0) 57 + 58 + if !q.Enqueue(dawn, Job{Run: func() error { return nil }}) { 59 + t.Fatal("first Enqueue() returned false, want true") 60 + } 61 + if !q.Enqueue(dawn, Job{Run: func() error { return nil }}) { 62 + t.Fatal("second Enqueue() returned false, want true") 63 + } 64 + if q.Enqueue(dawn, Job{Run: func() error { return nil }}) { 65 + t.Fatal("third Enqueue() returned true on full queue, want false") 66 + } 67 + } 68 + 69 + func TestQueuePrefersOwnerWithFewestRunning(t *testing.T) { 70 + t.Parallel() 71 + 72 + // 2 workers. alice gets the first slot; while she's holding it, eve's 73 + // job should win the second slot over alice's own queued waiters. 74 + q := NewQueue(20, 2) 75 + 76 + releaseAlice1 := make(chan struct{}) 77 + gotEve := make(chan struct{}, 1) 78 + 79 + q.Enqueue(alice, Job{Run: func() error { 80 + <-releaseAlice1 81 + return nil 82 + }}) 83 + // alice queues two more 84 + q.Enqueue(alice, Job{Run: func() error { return nil }}) 85 + q.Enqueue(alice, Job{Run: func() error { return nil }}) 86 + // eve queues one 87 + q.Enqueue(eve, Job{Run: func() error { 88 + gotEve <- struct{}{} 89 + return nil 90 + }}) 91 + 92 + q.Start() 93 + 94 + // eve should run while alice's first is still held 95 + select { 96 + case <-gotEve: 97 + case <-time.After(time.Second): 98 + t.Fatal("eve's job did not run while alice was blocked") 99 + } 100 + 101 + close(releaseAlice1) 102 + q.Stop() 103 + }
+55 -46
spindle/server.go
··· 9 9 "maps" 10 10 "net/http" 11 11 "sync" 12 + "time" 12 13 13 14 "github.com/bluesky-social/indigo/atproto/syntax" 14 15 "github.com/go-chi/chi/v5" ··· 25 26 "tangled.org/core/spindle/db" 26 27 "tangled.org/core/spindle/engine" 27 28 "tangled.org/core/spindle/engines/dummy" 29 + "tangled.org/core/spindle/engines/microvm" 28 30 "tangled.org/core/spindle/engines/nixery" 29 31 "tangled.org/core/spindle/models" 30 32 "tangled.org/core/spindle/queue" ··· 41 43 ) 42 44 43 45 type Spindle struct { 44 - jc *jetstream.JetstreamClient 45 - tap *Tap 46 - embedTap *embeddedTap 47 - db *db.DB 48 - e *rbac.Enforcer 49 - l *slog.Logger 50 - n *notifier.Notifier 51 - engs map[string]models.Engine 52 - jq *queue.Queue 53 - cfg *config.Config 54 - ks *eventconsumer.Consumer 55 - res *idresolver.Resolver 56 - vault secrets.Manager 57 - motd []byte 58 - motdMu sync.RWMutex 59 - workflowSem chan struct{} 60 - rootCtx context.Context 46 + jc *jetstream.JetstreamClient 47 + tap *Tap 48 + embedTap *embeddedTap 49 + db *db.DB 50 + e *rbac.Enforcer 51 + l *slog.Logger 52 + n *notifier.Notifier 53 + engs map[string]models.Engine 54 + jq *queue.Queue 55 + cfg *config.Config 56 + ks *eventconsumer.Consumer 57 + res *idresolver.Resolver 58 + vault secrets.Manager 59 + motd []byte 60 + motdMu sync.RWMutex 61 + rootCtx context.Context 61 62 } 62 63 63 64 // New creates a new Spindle server with the provided configuration and engines. 64 - func New(ctx context.Context, cfg *config.Config, engines map[string]models.Engine) (*Spindle, error) { 65 + func New(ctx context.Context, cfg *config.Config, d *db.DB, engines map[string]models.Engine) (*Spindle, error) { 65 66 logger := log.FromContext(ctx) 66 67 67 - d, err := db.Make(ctx, cfg.Server.DBPath) 68 - if err != nil { 69 - return nil, fmt.Errorf("failed to setup db: %w", err) 70 - } 71 - 72 68 e, err := rbac.NewEnforcer(cfg.Server.DBPath) 73 69 if err != nil { 74 70 return nil, fmt.Errorf("failed to setup rbac enforcer: %w", err) ··· 109 105 jq := queue.NewQueue(cfg.Server.QueueSize, cfg.Server.MaxJobCount) 110 106 logger.Info("initialized queue", "queueSize", cfg.Server.QueueSize, "numWorkers", cfg.Server.MaxJobCount) 111 107 112 - workflowSem := make(chan struct{}, cfg.Server.MaxConcurrentWorkflows) 113 - logger.Info("initialized workflow semaphore", "maxConcurrentWorkflows", cfg.Server.MaxConcurrentWorkflows) 114 - 115 108 collections := []string{ 116 109 tangled.SpindleMemberNSID, 117 110 tangled.RepoNSID, ··· 145 138 resolver := idresolver.DefaultResolver(cfg.Server.PlcUrl) 146 139 147 140 spindle := &Spindle{ 148 - jc: jc, 149 - e: e, 150 - db: d, 151 - l: logger, 152 - n: &n, 153 - engs: engines, 154 - jq: jq, 155 - cfg: cfg, 156 - res: resolver, 157 - vault: vault, 158 - motd: defaultMotd, 159 - workflowSem: workflowSem, 160 - rootCtx: ctx, 141 + jc: jc, 142 + e: e, 143 + db: d, 144 + l: logger, 145 + n: &n, 146 + engs: engines, 147 + jq: jq, 148 + cfg: cfg, 149 + res: resolver, 150 + vault: vault, 151 + motd: defaultMotd, 152 + rootCtx: ctx, 161 153 } 162 154 163 155 err = e.AddSpindle(rbacDomain) ··· 185 177 // job in the above registered queue. 186 178 ccfg := eventconsumer.NewConsumerConfig() 187 179 ccfg.Logger = log.SubLogger(logger, "eventconsumer") 188 - ccfg.URLFunc = eventconsumer.DefaultURL(cfg.Server.Dev) 189 180 ccfg.ProcessFunc = spindle.processPipeline 190 181 ccfg.CursorStore = cursorStore 182 + if cfg.Server.Dev { 183 + ccfg.RetryInterval = 5 * time.Second 184 + ccfg.MaxRetryInterval = 10 * time.Second 185 + } else { 186 + ccfg.RetryInterval = 1 * time.Minute 187 + ccfg.MaxRetryInterval = 10 * time.Minute 188 + } 191 189 knownKnots, err := d.Knots() 192 190 if err != nil { 193 191 return nil, err ··· 330 328 return fmt.Errorf("failed to load config: %w", err) 331 329 } 332 330 331 + d, err := db.Make(ctx, cfg.Server.DBPath) 332 + if err != nil { 333 + return fmt.Errorf("failed to setup db: %w", err) 334 + } 335 + 333 336 nixeryEng, err := nixery.New(ctx, cfg) 334 337 if err != nil { 335 338 return err 336 339 } 337 340 338 - s, err := New(ctx, cfg, map[string]models.Engine{ 339 - "nixery": nixeryEng, 340 - "dummy": dummy.New(log.FromContext(ctx)), 341 + microvmEng, err := microvm.New(ctx, cfg, d) 342 + if err != nil { 343 + return err 344 + } 345 + 346 + s, err := New(ctx, cfg, d, map[string]models.Engine{ 347 + "nixery": nixeryEng, 348 + "microvm": microvmEng, 349 + "dummy": dummy.New(log.FromContext(ctx)), 341 350 }) 342 351 if err != nil { 343 352 return err ··· 413 422 workflows := make(map[models.Engine][]models.Workflow) 414 423 415 424 // Build pipeline environment variables once for all workflows 416 - pipelineEnv := models.PipelineEnvVars(tpl.TriggerMetadata, pipelineId, s.cfg.Server.Dev) 425 + pipelineEnv := models.PipelineEnvVars(tpl.TriggerMetadata, pipelineId) 417 426 418 427 for _, w := range tpl.Workflows { 419 428 if w != nil { ··· 467 476 } 468 477 } 469 478 470 - ok := s.jq.Enqueue(queue.Job{ 479 + ok := s.jq.Enqueue(repoDid, queue.Job{ 471 480 Run: func() error { 472 - engine.StartWorkflows(log.SubLogger(s.l, "engine"), s.vault, s.cfg, s.db, s.n, s.workflowSem, ctx, &models.Pipeline{ 481 + engine.StartWorkflows(log.SubLogger(s.l, "engine"), s.vault, s.cfg, s.db, s.n, ctx, &models.Pipeline{ 473 482 RepoDid: repoDid, 474 483 Workflows: workflows, 475 484 }, pipelineId)