···1010 "log/slog"
1111 "net"
1212 "os"
1313+ "path/filepath"
1314 "time"
14151516 "github.com/mdlayher/vsock"
···7475 Usage: "timeout for the guest command",
7576 },
7677 &cli.DurationFlag{
7777- Name: "cache-drain-timeout",
7878- Value: 5 * time.Minute,
7979- Usage: "how long to wait for queued cache uploads after the guest command exits",
7878+ Name: "cache-upload-wait-timeout",
7979+ Aliases: []string{"cache-drain-timeout"},
8080+ Value: 5 * time.Minute,
8181+ Usage: "how long to wait for guest cache uploads to finish after the command exits",
8082 },
8183 &cli.DurationFlag{
8284 Name: "shutdown-timeout",
···177179 }
178180 jobID := "spindle-microvm-run"
179181 execID := "dev-1"
182182+ var pendingConfigKey string
183183+ var pendingConfigToplevel string
184184+ var configCacheDB *db.DB
180185181186 fmt.Fprintf(os.Stderr, "listening for agent on %s\n", ln.Addr())
182187 conn, err := acceptExpectedVsockConn(ln, vm.CID(), logger)
···203208 var uploadCache *microvm.UploadCacheProxy
204209 if cmd.String("cache-upload-url") != "" {
205210 var err error
206206- uploadCache, err = microvm.StartUploadCacheProxy(ctx, vm.CID(), cmd.String("cache-upload-url"), upstreams, logger)
211211+ uploadCache, err = microvm.StartUploadCacheProxy(ctx, vm.CID(), cmd.String("cache-upload-url"), upstreams, filepath.Join(vm.WorkDir(), "upload-cache"), logger)
207212 if err != nil {
208213 return fmt.Errorf("start upload cache proxy: %w", err)
209214 }
···243248 return fmt.Errorf("calculate base config hash: %w", err)
244249 }
245250246246- var d *db.DB
247251 var configKey string
248252 var cachedToplevel string
249253 if cmd.String("db") != "" {
250250- d, err = db.Make(ctx, cmd.String("db"))
254254+ configCacheDB, err = db.Make(ctx, cmd.String("db"))
251255 if err != nil {
252256 return fmt.Errorf("failed to open database: %w", err)
253257 }
254254- defer d.Close()
258258+ defer configCacheDB.Close()
255259256260 configKey, err = microvm.BuildConfigKey(imageSpec, cmd.String("activate-config"))
257261 if err != nil {
258262 return fmt.Errorf("calculate config key: %w", err)
259263 }
260264261261- record, err := d.GetNixOSToplevelCacheRecord(configKey)
265265+ record, err := configCacheDB.GetNixOSToplevelCacheRecord(configKey)
262266 if err != nil {
263267 if !errors.Is(err, sql.ErrNoRows) {
264268 return fmt.Errorf("lookup config cache: %w", err)
···280284 }
281285 fmt.Fprintf(os.Stderr, "activated config toplevel: %s\n", result.Toplevel)
282286283283- if d != nil && cachedToplevel == "" && result.Toplevel != "" && configKey != "" {
284284- err = d.SaveNixOSToplevelCacheRecord(configKey, result.Toplevel)
285285- if err != nil {
286286- return fmt.Errorf("save config cache: %w", err)
287287+ if configCacheDB != nil && cachedToplevel == "" && result.Toplevel != "" && configKey != "" {
288288+ if uploadCache == nil {
289289+ fmt.Fprintln(os.Stderr, "skipping config cache metadata commit: no cache upload url configured")
290290+ } else {
291291+ pendingConfigKey = configKey
292292+ pendingConfigToplevel = result.Toplevel
287293 }
288294 }
289295 }
···302308 }
303309304310 if uploadCache != nil {
305305- drainCtx := ctx
306306- if cmd.Duration("cache-drain-timeout") > 0 {
311311+ uploadWaitCtx := ctx
312312+ if cmd.Duration("cache-upload-wait-timeout") > 0 {
307313 var cancel context.CancelFunc
308308- drainCtx, cancel = context.WithTimeout(ctx, cmd.Duration("cache-drain-timeout"))
314314+ uploadWaitCtx, cancel = context.WithTimeout(ctx, cmd.Duration("cache-upload-wait-timeout"))
309315 defer cancel()
310316 }
311311- uploaded, err := session.Drain(drainCtx)
317317+ uploaded, err := session.Drain(uploadWaitCtx)
312318 if err != nil {
313319 return err
314320 }
315321 fmt.Printf("cache uploaded: %d\n", uploaded)
322322+ if configCacheDB != nil && pendingConfigKey != "" && pendingConfigToplevel != "" {
323323+ if err := configCacheDB.SaveNixOSToplevelCacheRecord(pendingConfigKey, pendingConfigToplevel); err != nil {
324324+ return fmt.Errorf("save config cache: %w", err)
325325+ }
326326+ }
316327 }
317328318329 // mirror the engine shutdown order: ask the agent to power off first,
+30-3
spindle/engines/microvm/README.md
···188188Teardown is same whether the workflow succeeded, failed or timed out: drain the
189189guest's pending Nix cache uploads, ask the agent to power off and wait for QEMU
190190to exit (falling back to QMP `system_powerdown` and finally a kill if it
191191-doesn't), then close the proxies and remove the work directory.
191191+doesn't), then close the proxies and remove the work directory. For non-HTTP
192192+upload targets the host-side import already happened synchronously when the
193193+guest committed each narinfo, so there is no second host-side cache drain step
194194+at teardown.
192195193196### Nix cache
194197···204207The upload proxy goes the other way: paths built inside the guest are pushed to
205208spindle's configured upload cache (if any) so the next workflow that needs them
206209doesn't rebuild. Paths already present on any configured read cache are skipped.
207207-The agent queues built paths and they're uploaded eagerly as they appear; any
208208-still in flight at teardown block the drain step until they finish.
210210+211211+For `http://` and `https://` upload targets the proxy just reverse-proxies the
212212+guest's binary-cache upload traffic to the configured remote cache, while still
213213+answering narinfo existence checks across the upload target plus the read
214214+caches.
215215+216216+For `ssh://`, `ssh-ng://`, `daemon`, and `local` targets spindle implements the
217217+small HTTP binary-cache upload surface itself. It stages uploaded `nar/` objects
218218+and narinfos under the workflow workdir, validates the narinfo, then treats the
219219+narinfo upload as the commit point: once `<hash>.narinfo` is written spindle
220220+runs:
221221+222222+```bash
223223+nix copy \
224224+ --from file://<staging-dir> \
225225+ --to <target-store> \
226226+ --no-check-sigs \
227227+ --substitute-on-destination \
228228+ <store-path>
229229+```
230230+231231+That copy is synchronous. If it fails, spindle removes the staged narinfo again
232232+so future `GET`/`HEAD <hash>.narinfo` requests do not falsely dedupe a path that
233233+never made it to the destination store. The guest still only ever sees the same
234234+HTTP binary-cache upload protocol over vsock; it never gets direct access to
235235+SSH credentials or the destination store itself.