Monorepo for Tangled tangled.org
6

Configure Feed

Select the types of activity you want to include in your feed.

1package microvm 2 3import ( 4 "context" 5 _ "embed" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "log/slog" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "strconv" 14 "strings" 15 "sync" 16 "time" 17 18 "github.com/digitalocean/go-qemu/qmp" 19 "github.com/google/uuid" 20) 21 22const ( 23 defaultQMPTimeout = 10 * time.Second 24 outerSlirpCIDR = "10.0.2.0/24" 25 innerSlirpNet = "10.0.3.0/24" 26 innerSlirpHost = "10.0.3.2" 27 innerSlirpDNS = "10.0.3.3" 28 innerSlirpDHCP = "10.0.3.15" 29 netnsTapName = "tap0" 30 netnsMTU = "65520" 31) 32 33type QEMUConfig struct { 34 Image ImageSpec 35 BootTimeout time.Duration 36 CID uint32 37 EnableKVM bool 38 QEMULogPath string 39 QMPPath string 40 SerialLogPath string 41 WorkDir string 42 VolumePaths map[string]string 43 VolumeBaseName string 44 Cgroup CgroupLimits 45 Dev bool 46} 47 48type QEMUVMHandle struct { 49 cid uint32 50 Process *os.Process 51 qemuLogPath string 52 QMPMon *qmp.SocketMonitor 53 QMPPath string 54 serialLogPath string 55 workDir string 56 57 cmd *exec.Cmd 58 done chan struct{} 59 qemuLogFile *os.File 60 cgroup *CgroupHandle 61 slirpCmd *exec.Cmd 62 slirpExit *os.File 63 waitErr error 64 waitErrMu sync.Mutex 65} 66 67type qemuRunner struct{} 68 69func (qemuRunner) Validate(spec ImageSpec, enableKVM bool) error { 70 if _, err := exec.LookPath(spec.RunnerCmd()); err != nil { 71 return fmt.Errorf("required host command %q not found in PATH: %w", spec.RunnerCmd(), err) 72 } 73 if _, err := os.Stat("/dev/vhost-vsock"); err != nil { 74 return fmt.Errorf("microvm requires /dev/vhost-vsock for vhost-vsock-device: %w", err) 75 } 76 if enableKVM { 77 if _, err := os.Stat("/dev/kvm"); err != nil { 78 return fmt.Errorf("microvm KVM was requested but /dev/kvm is not accessible: %w", err) 79 } 80 } 81 if len(spec.NetworkInterfaces) > 0 { 82 if _, err := os.Stat("/dev/net/tun"); err != nil { 83 return fmt.Errorf("microvm slirp4netns networking requires /dev/net/tun: %w", err) 84 } 85 for _, cmd := range []string{"ip", "mount", "slirp4netns", "unshare"} { 86 if _, err := exec.LookPath(cmd); err != nil { 87 return fmt.Errorf("required host command %q not found in PATH: %w", cmd, err) 88 } 89 } 90 } 91 return nil 92} 93 94func (qemuRunner) Start(ctx context.Context, cfg VMConfig, volumePaths map[string]string, logger *slog.Logger) (VMHandle, error) { 95 bootTimeout := cfg.BootTimeout 96 if bootTimeout == 0 { 97 bootTimeout = 10 * time.Second 98 } 99 return StartQEMU(ctx, QEMUConfig{ 100 Image: cfg.Image, 101 BootTimeout: bootTimeout, 102 CID: cfg.CID, 103 EnableKVM: cfg.EnableKVM, 104 WorkDir: cfg.WorkDir, 105 VolumePaths: volumePaths, 106 Cgroup: cfg.Cgroup, 107 Dev: cfg.Dev, 108 }, logger) 109} 110 111func StartQEMU(ctx context.Context, cfg QEMUConfig, logger *slog.Logger) (VMHandle, error) { 112 if logger == nil { 113 logger = slog.Default() 114 } 115 116 workDir := cfg.WorkDir 117 118 handle := &QEMUVMHandle{ 119 workDir: workDir, 120 } 121 122 var ok bool 123 defer func() { 124 if !ok { 125 _ = handle.Close() 126 } 127 }() 128 129 cid := cfg.CID 130 if cid == 0 { 131 var err error 132 cid, err = AllocateCID() 133 if err != nil { 134 return nil, err 135 } 136 } 137 if cid < minGuestCID { 138 return nil, fmt.Errorf("guest CID must be >= %d", minGuestCID) 139 } 140 handle.cid = cid 141 142 volumePaths := cfg.VolumePaths 143 144 qemuLogPath := cfg.QEMULogPath 145 if qemuLogPath == "" { 146 qemuLogPath = filepath.Join(workDir, "qemu.log") 147 } 148 qemuLogFile, err := createParentedFile(qemuLogPath) 149 if err != nil { 150 return nil, err 151 } 152 handle.qemuLogPath = qemuLogPath 153 handle.qemuLogFile = qemuLogFile 154 155 serialLogPath := cfg.SerialLogPath 156 if serialLogPath == "" { 157 serialLogPath = filepath.Join(workDir, "serial.log") 158 } 159 if err := os.MkdirAll(filepath.Dir(serialLogPath), 0o755); err != nil { 160 return nil, fmt.Errorf("create serial log directory: %w", err) 161 } 162 handle.serialLogPath = serialLogPath 163 164 qmpPath := cfg.QMPPath 165 if qmpPath == "" { 166 qmpPath = filepath.Join(workDir, "qmp.sock") 167 } 168 handle.QMPPath = qmpPath 169 170 qemuCmd := cfg.Image.RunnerCmd() 171 qemuBinary, err := exec.LookPath(qemuCmd) 172 if err != nil { 173 return nil, fmt.Errorf("%s command not found in PATH: %w", qemuCmd, err) 174 } 175 176 args, err := qemuArgs(qemuArgsConfig{ 177 Image: cfg.Image, 178 CID: cid, 179 EnableKVM: cfg.EnableKVM, 180 QMPPath: qmpPath, 181 SerialLogPath: serialLogPath, 182 VolumePaths: volumePaths, 183 }) 184 if err != nil { 185 return nil, err 186 } 187 188 cmd, slirpNet, err := qemuCommand(ctx, qemuBinary, args, cfg.Image, workDir, cfg.Dev) 189 if err != nil { 190 return nil, err 191 } 192 cmd.Env = append(os.Environ(), "TMPDIR="+workDir) 193 cmd.Stdout = qemuLogFile 194 cmd.Stderr = qemuLogFile 195 196 cgroup, err := prepareCgroup(cfg.Cgroup, logger) 197 if err != nil { 198 return nil, err 199 } 200 handle.cgroup = cgroup 201 202 logger.Info("starting qemu microvm", "cid", cid, "workDir", workDir, "serialLog", serialLogPath, "qmp", qmpPath) 203 if err := cmd.Start(); err != nil { 204 return nil, fmt.Errorf("starting qemu: %w", err) 205 } 206 handle.cmd = cmd 207 handle.Process = cmd.Process 208 handle.done = make(chan struct{}) 209 go func() { 210 err := cmd.Wait() 211 handle.waitErrMu.Lock() 212 handle.waitErr = err 213 handle.waitErrMu.Unlock() 214 close(handle.done) 215 }() 216 217 if err := cgroup.AddProcess(cmd.Process.Pid, logger); err != nil { 218 return nil, err 219 } 220 221 if slirpNet != nil { 222 handle.slirpCmd, handle.slirpExit, err = slirpNet.Start(ctx, qemuLogFile, logger) 223 if err != nil { 224 return nil, err 225 } 226 if handle.slirpCmd != nil && handle.slirpCmd.Process != nil { 227 if err := cgroup.AddProcess(handle.slirpCmd.Process.Pid, logger); err != nil { 228 return nil, err 229 } 230 } 231 } 232 233 qmpTimeout := cfg.BootTimeout 234 if qmpTimeout == 0 { 235 qmpTimeout = defaultQMPTimeout 236 } 237 if err := handle.waitForQMP(ctx, qmpTimeout); err != nil { 238 return nil, err 239 } 240 241 status, err := handle.QMPQueryStatus() 242 if err != nil { 243 return nil, err 244 } 245 if status != "running" { 246 return nil, fmt.Errorf("qemu guest not running (status: %s)", status) 247 } 248 logger.Info("qemu microvm running", "cid", cid, "status", status) 249 250 ok = true 251 return handle, nil 252} 253 254func (h *QEMUVMHandle) Wait() error { 255 if h == nil || h.done == nil { 256 return nil 257 } 258 <-h.done 259 h.waitErrMu.Lock() 260 defer h.waitErrMu.Unlock() 261 return h.waitErr 262} 263 264func (h *QEMUVMHandle) WaitContext(ctx context.Context) error { 265 if h == nil || h.done == nil { 266 return nil 267 } 268 select { 269 case <-h.done: 270 h.waitErrMu.Lock() 271 defer h.waitErrMu.Unlock() 272 return h.waitErr 273 case <-ctx.Done(): 274 return ctx.Err() 275 } 276} 277 278func (h *QEMUVMHandle) Kill() error { 279 if h == nil || h.Process == nil { 280 return nil 281 } 282 return h.Process.Kill() 283} 284 285func (h *QEMUVMHandle) Shutdown(ctx context.Context) error { 286 if h == nil { 287 return nil 288 } 289 if h.QMPMon != nil { 290 if err := h.QMPSystemPowerdown(); err != nil { 291 return err 292 } 293 } 294 if h.done == nil { 295 return nil 296 } 297 select { 298 case <-h.done: 299 return h.Wait() 300 case <-ctx.Done(): 301 _ = h.Kill() 302 _ = h.Wait() 303 return ctx.Err() 304 } 305} 306 307func (h *QEMUVMHandle) Close() error { 308 if h == nil { 309 return nil 310 } 311 312 var closeErr error 313 if h.QMPMon != nil { 314 closeErr = errors.Join(closeErr, h.QMPMon.Disconnect()) 315 h.QMPMon = nil 316 } 317 if h.Process != nil { 318 _ = h.Process.Kill() 319 _ = h.Wait() 320 } 321 if h.slirpExit != nil { 322 _ = h.slirpExit.Close() 323 h.slirpExit = nil 324 } 325 if h.slirpCmd != nil && h.slirpCmd.Process != nil { 326 _ = h.slirpCmd.Process.Kill() 327 _ = h.slirpCmd.Wait() 328 h.slirpCmd = nil 329 } 330 if h.qemuLogFile != nil { 331 closeErr = errors.Join(closeErr, h.qemuLogFile.Close()) 332 h.qemuLogFile = nil 333 } 334 if h.cgroup != nil { 335 closeErr = errors.Join(closeErr, h.cgroup.Close()) 336 h.cgroup = nil 337 } 338 return closeErr 339} 340 341func (h *QEMUVMHandle) QMPRun(command qmp.Command) ([]byte, error) { 342 if h == nil || h.QMPMon == nil { 343 return nil, fmt.Errorf("qmp monitor is not connected") 344 } 345 data, err := json.Marshal(command) 346 if err != nil { 347 return nil, err 348 } 349 return h.QMPMon.Run(data) 350} 351 352func (h *QEMUVMHandle) QMPQueryStatus() (string, error) { 353 raw, err := h.QMPRun(qmp.Command{Execute: "query-status"}) 354 if err != nil { 355 return "", fmt.Errorf("qmp query-status failed: %w", err) 356 } 357 358 var resp struct { 359 Return struct { 360 Status string `json:"status"` 361 } `json:"return"` 362 } 363 if err := json.Unmarshal(raw, &resp); err != nil { 364 return "", fmt.Errorf("qmp query-status parse: %w", err) 365 } 366 return resp.Return.Status, nil 367} 368 369func (h *QEMUVMHandle) QMPSystemPowerdown() error { 370 _, err := h.QMPRun(qmp.Command{Execute: "system_powerdown"}) 371 return err 372} 373 374func (h *QEMUVMHandle) Logs() VMLogs { 375 if h == nil { 376 return VMLogs{} 377 } 378 return VMLogs{ 379 Serial: h.serialLogPath, 380 Extra: map[string]string{ 381 "qemu": h.qemuLogPath, 382 }, 383 } 384} 385 386func (h *QEMUVMHandle) CID() uint32 { 387 if h == nil { 388 return 0 389 } 390 return h.cid 391} 392 393func (h *QEMUVMHandle) WorkDir() string { 394 if h == nil { 395 return "" 396 } 397 return h.workDir 398} 399 400func (h *QEMUVMHandle) OOMKilled() bool { 401 if h == nil { 402 return false 403 } 404 return h.cgroup.OOMKilled() 405} 406 407func (h *QEMUVMHandle) waitForQMP(ctx context.Context, timeout time.Duration) error { 408 qmpCtx, cancel := context.WithTimeout(ctx, timeout) 409 defer cancel() 410 411 var lastErr error 412 for { 413 mon, err := qmp.NewSocketMonitor("unix", h.QMPPath, 2*time.Second) 414 if err == nil { 415 if err = mon.Connect(); err == nil { 416 h.QMPMon = mon 417 return nil 418 } 419 _ = mon.Disconnect() 420 } 421 lastErr = err 422 423 select { 424 case <-qmpCtx.Done(): 425 return fmt.Errorf("qmp connect timeout: %w", lastErr) 426 case <-h.done: 427 return fmt.Errorf("qemu exited before qmp was ready: %w", h.Wait()) 428 case <-time.After(25 * time.Millisecond): 429 } 430 } 431} 432 433func qemuCommand( 434 ctx context.Context, 435 qemuBinary string, 436 args []string, 437 spec ImageSpec, 438 workDir string, 439 dev bool, 440) (*exec.Cmd, *slirpNamespace, error) { 441 if len(spec.NetworkInterfaces) == 0 { 442 return exec.CommandContext(ctx, qemuBinary, args...), nil, nil 443 } 444 445 ipPath, err := exec.LookPath("ip") 446 if err != nil { 447 return nil, nil, fmt.Errorf("ip command not found in PATH: %w", err) 448 } 449 mountPath, err := exec.LookPath("mount") 450 if err != nil { 451 return nil, nil, fmt.Errorf("mount command not found in PATH: %w", err) 452 } 453 unsharePath, err := exec.LookPath("unshare") 454 if err != nil { 455 return nil, nil, fmt.Errorf("unshare command not found in PATH: %w", err) 456 } 457 458 pidFile, resolvPath, wrapperPath, err := prepareQEMUNetnsFiles(workDir, dev) 459 if err != nil { 460 return nil, nil, err 461 } 462 463 cmdArgs := append([]string{ 464 "--user", 465 "--map-root-user", 466 "--net", 467 "--mount", 468 "--propagation", "private", 469 "--", 470 wrapperPath, 471 pidFile, 472 ipPath, 473 mountPath, 474 resolvPath, 475 qemuBinary, 476 }, args...) 477 478 cmd := exec.CommandContext(ctx, unsharePath, cmdArgs...) 479 480 return cmd, &slirpNamespace{ 481 spec: spec, 482 pidFile: pidFile, 483 dev: dev, 484 }, nil 485} 486 487func prepareQEMUNetnsFiles(workDir string, dev bool) (pidFile, resolvPath, wrapperPath string, err error) { 488 pidFile = filepath.Join(workDir, "qemu-netns.pid") 489 resolvPath = filepath.Join(workDir, "qemu-netns-resolv.conf") 490 wrapperPath = filepath.Join(workDir, "qemu-netns-wrapper") 491 492 // the guest resolves through shuttle on 127.0.0.1. keep qemu's slirp DNS 493 // pointed at an unroutable local resolver inside this network namespace so 494 // direct guest queries to 10.0.3.3 don't bypass the shuttle dns policy. 495 if err := os.WriteFile(resolvPath, []byte("nameserver 127.0.0.1\n"), 0o644); err != nil { 496 return "", "", "", fmt.Errorf("write qemu network namespace resolv.conf: %w", err) 497 } 498 499 if err := writeNetnsWrapper(wrapperPath, dev); err != nil { 500 return "", "", "", fmt.Errorf("write qemu network namespace wrapper: %w", err) 501 } 502 503 return pidFile, resolvPath, wrapperPath, nil 504} 505 506type qemuArgsConfig struct { 507 Image ImageSpec 508 CID uint32 509 EnableKVM bool 510 QMPPath string 511 SerialLogPath string 512 VolumePaths map[string]string 513} 514 515func qemuArgs(cfg qemuArgsConfig) ([]string, error) { 516 uuid := uuid.New() 517 518 b := newArgBuilder(64) 519 520 addQEMUMachineArgs(&b, cfg, uuid) 521 addQEMUStoreArgs(&b, cfg) 522 523 if cfg.EnableKVM { 524 addQEMUKVMArgs(&b, cfg.Image) 525 } 526 527 if err := addQEMUVolumeArgs(&b, cfg); err != nil { 528 return nil, err 529 } 530 531 if err := addQEMUNetworkArgs(&b, cfg.Image.NetworkInterfaces); err != nil { 532 return nil, err 533 } 534 535 b.Optf("-device", "vhost-vsock-device,guest-cid=%d", cfg.CID) 536 537 if len(cfg.Image.RunnerConfig.ExtraArgs) > 0 { 538 b.Add(cfg.Image.RunnerConfig.ExtraArgs...) 539 } 540 541 return b.Args(), nil 542} 543 544func addQEMUMachineArgs(b *argBuilder, cfg qemuArgsConfig, uuid uuid.UUID) { 545 if cfg.Image.RunnerConfig.Machine != "" { 546 b.Opt("-M", cfg.Image.RunnerConfig.Machine) 547 } 548 b.Optf("-m", "%dM", cfg.Image.MemoryMiB) 549 b.Opt("-smp", strconv.Itoa(cfg.Image.VCPUs)) 550 551 b.Add( 552 "-nodefaults", 553 "-no-user-config", 554 "-no-reboot", 555 ) 556 557 b.Opt("-kernel", cfg.Image.Kernel) 558 b.Opt("-initrd", cfg.Image.Initrd) 559 560 b.Opt("-device", "virtio-rng-device") 561 562 b.Optf("-smbios", "type=1,uuid=%s", uuid) 563 b.Opt("-serial", "file:"+cfg.SerialLogPath) 564 565 // use virtio console if requsted. this is faster than the serial UART logging 566 // because serial has a higher cost when being accesssed. we still have to 567 // support serial itself for early kernel boot but thats OK. 568 if cfg.Image.RunnerConfig.Console == "hvc0" { 569 b.Optf("-chardev", "file,id=virtiocon0,path=%s,append=on", cfg.SerialLogPath) 570 b.Add("-device", "virtio-serial-device") 571 b.Opt("-device", "virtconsole,chardev=virtiocon0") 572 } 573 b.Opt("-display", "none") 574 b.Opt("-monitor", "none") 575 b.Opt("-append", cfg.Image.BootArgs) 576 577 b.Opt("-sandbox", "on") 578 b.Optf("-qmp", "unix:%s,server,nowait", cfg.QMPPath) 579} 580 581func addQEMUStoreArgs(b *argBuilder, cfg qemuArgsConfig) { 582 drive := newOptionBuilder(8) 583 drive.KV("id", "store") 584 drive.KV("format", "raw") 585 drive.Add("read-only=on") 586 drive.KV("file", cfg.Image.StoreDisk) 587 drive.Add("if=none") 588 drive.Add("aio=io_uring") 589 590 b.Opt("-drive", drive.String()) 591 b.Opt("-device", "virtio-blk-device,drive=store") 592} 593 594func addQEMUKVMArgs(b *argBuilder, image ImageSpec) { 595 b.Flag("-enable-kvm") 596 if image.RunnerConfig.CPU != "" { 597 b.Opt("-cpu", image.RunnerConfig.CPU) 598 } 599 b.Opt("-device", "i8042") 600} 601 602func addQEMUVolumeArgs(b *argBuilder, cfg qemuArgsConfig) error { 603 for index, volume := range cfg.Image.Volumes { 604 path := cfg.VolumePaths[volume.Image] 605 if path == "" { 606 return fmt.Errorf("missing prepared path for volume %q", volume.Image) 607 } 608 609 driveID := fmt.Sprintf("volume%d", index) 610 611 drive := newOptionBuilder(10) 612 drive.KV("id", driveID) 613 drive.KV("format", "raw") 614 drive.Add("read-only=off") 615 drive.KV("file", path) 616 drive.Add("if=none") 617 drive.Add("aio=io_uring") 618 drive.Add("discard=unmap") 619 drive.Add("cache=none") 620 621 b.Opt("-drive", drive.String()) 622 b.Optf("-device", "virtio-blk-device,drive=%s", driveID) 623 } 624 625 return nil 626} 627 628func addQEMUNetworkArgs(b *argBuilder, interfaces []NetworkInterface) error { 629 for _, networkInterface := range interfaces { 630 if networkInterface.Type != "slirp4netns" { 631 return fmt.Errorf("unsupported microvm network interface type %q", networkInterface.Type) 632 } 633 634 netdevOpts := newOptionBuilder(6) 635 netdevOpts.Add("user") 636 netdevOpts.KV("id", networkInterface.ID) 637 netdevOpts.KV("net", innerSlirpNet) 638 netdevOpts.KV("host", innerSlirpHost) 639 netdevOpts.KV("dns", innerSlirpDNS) 640 netdevOpts.KV("dhcpstart", innerSlirpDHCP) 641 642 b.Opt("-netdev", netdevOpts.String()) 643 b.Optf( 644 "-device", "virtio-net-device,netdev=%s,mac=%s", 645 networkInterface.ID, networkInterface.MAC, 646 ) 647 } 648 649 return nil 650} 651 652func waitForPIDFile(ctx context.Context, path string) (string, error) { 653 waitCtx, cancel := context.WithTimeout(ctx, 5*time.Second) 654 defer cancel() 655 656 ticker := time.NewTicker(25 * time.Millisecond) 657 defer ticker.Stop() 658 659 for { 660 data, err := os.ReadFile(path) 661 if err == nil { 662 pid := strings.TrimSpace(string(data)) 663 if pid != "" { 664 return pid, nil 665 } 666 } else if !errors.Is(err, os.ErrNotExist) { 667 return "", fmt.Errorf("read qemu network namespace pid: %w", err) 668 } 669 670 select { 671 case <-waitCtx.Done(): 672 return "", fmt.Errorf("waiting for qemu network namespace pid: %w", waitCtx.Err()) 673 case <-ticker.C: 674 } 675 } 676}