Monorepo for Tangled tangled.org
2

Configure Feed

Select the types of activity you want to include in your feed.

1package microvm 2 3import ( 4 "fmt" 5 "log/slog" 6 "os" 7 "path/filepath" 8 "regexp" 9 "strings" 10 11 cgroups "github.com/containerd/cgroups/v3" 12 "github.com/containerd/cgroups/v3/cgroup2" 13 "github.com/prometheus/procfs" 14) 15 16var ( 17 cgroupInvalidChar = regexp.MustCompile(`[^a-zA-Z0-9\-_.]`) 18 cgroupConsecutiveSep = regexp.MustCompile(`[-_.]{2,}`) 19) 20 21const ( 22 cgroupParentSelf = "self" 23 supervisorCgroupName = "supervisor" 24) 25 26type CgroupLimits struct { 27 Enabled bool 28 Parent *CgroupParent 29 Name string 30 MemoryMaxMiB int64 31 SwapMaxMiB *int64 32 PidsMax int64 33} 34 35type CgroupParent struct { 36 root *cgroup2.Manager 37 mountpoint string 38 group string 39} 40 41type CgroupHandle struct { 42 manager *cgroup2.Manager 43} 44 45func initCgroupParent(parent string, supervisorMemoryMinMiB int64, logger *slog.Logger) (*CgroupParent, error) { 46 if parent == "" { 47 parent = cgroupParentSelf 48 } 49 if cgroups.Mode() != cgroups.Unified { 50 return nil, fmt.Errorf("microVM cgroups require cgroup v2 unified mode") 51 } 52 53 mountpoint, group, err := resolveCgroupParent(parent) 54 if err != nil { 55 return nil, err 56 } 57 if _, err := os.Stat(filepath.Join(mountpoint, strings.TrimPrefix(group, "/"))); err != nil { 58 return nil, fmt.Errorf("stat cgroup parent %q:%q: %w", mountpoint, group, err) 59 } 60 61 root, err := cgroup2.Load(group, cgroup2.WithMountpoint(mountpoint)) 62 if err != nil { 63 return nil, fmt.Errorf("load cgroup parent %q:%q: %w", mountpoint, group, err) 64 } 65 66 if group != "/" { 67 if err := moveParentProcesses(root, supervisorMemoryMinMiB, logger); err != nil { 68 return nil, err 69 } 70 } 71 72 if logger != nil { 73 logger.Info("initialized microVM cgroup parent", "mountpoint", mountpoint, "group", group) 74 } 75 return &CgroupParent{root: root, mountpoint: mountpoint, group: group}, nil 76} 77 78func prepareCgroup(limits CgroupLimits, logger *slog.Logger) (*CgroupHandle, error) { 79 if !limits.Enabled { 80 return nil, nil 81 } 82 if limits.Parent == nil || limits.Parent.root == nil { 83 return nil, fmt.Errorf("cgroup parent is not initialized") 84 } 85 name := sanitizeCgroupName(limits.Name) 86 if name == "" { 87 return nil, fmt.Errorf("cgroup name is empty") 88 } 89 90 manager, err := limits.Parent.root.NewChild(name, cgroupResources(limits)) 91 if err != nil { 92 return nil, fmt.Errorf("create cgroup %q: %w", name, err) 93 } 94 95 if logger != nil { 96 logger.Info("created microVM cgroup", "name", name, "parentGroup", limits.Parent.group) 97 } 98 return &CgroupHandle{manager: manager}, nil 99} 100 101func cgroupResources(limits CgroupLimits) *cgroup2.Resources { 102 resources := &cgroup2.Resources{} 103 if limits.MemoryMaxMiB > 0 || limits.SwapMaxMiB != nil { 104 memory := &cgroup2.Memory{} 105 if limits.MemoryMaxMiB > 0 { 106 maxBytes := limits.MemoryMaxMiB * 1024 * 1024 107 memory.Max = &maxBytes 108 } 109 if limits.SwapMaxMiB != nil { 110 swapBytes := *limits.SwapMaxMiB * 1024 * 1024 111 memory.Swap = &swapBytes 112 } 113 oomGroup := true 114 memory.OOMGroup = &oomGroup 115 resources.Memory = memory 116 } 117 if limits.PidsMax > 0 { 118 resources.Pids = &cgroup2.Pids{Max: limits.PidsMax} 119 } 120 return resources 121} 122 123func supervisorResources(memoryMinMiB int64) *cgroup2.Resources { 124 if memoryMinMiB <= 0 { 125 return nil 126 } 127 minBytes := memoryMinMiB * 1024 * 1024 128 return &cgroup2.Resources{ 129 Memory: &cgroup2.Memory{Min: &minBytes}, 130 } 131} 132 133func (h *CgroupHandle) AddProcess(pid int, logger *slog.Logger) error { 134 if h == nil || h.manager == nil { 135 return nil 136 } 137 if pid <= 0 { 138 return fmt.Errorf("invalid pid %d", pid) 139 } 140 if err := h.manager.AddProc(uint64(pid)); err != nil { 141 return fmt.Errorf("add pid %d to cgroup: %w", pid, err) 142 } 143 if logger != nil { 144 logger.Info("added process to microVM cgroup", "pid", pid) 145 } 146 return nil 147} 148 149func (h *CgroupHandle) Close() error { 150 if h == nil || h.manager == nil { 151 return nil 152 } 153 return h.manager.Delete() 154} 155 156func (h *CgroupHandle) OOMKilled() bool { 157 if h == nil || h.manager == nil { 158 return false 159 } 160 metrics, err := h.manager.Stat() 161 if err != nil || metrics == nil || metrics.MemoryEvents == nil { 162 return false 163 } 164 return metrics.MemoryEvents.OomKill > 0 165} 166 167func resolveCgroupParent(parent string) (string, string, error) { 168 mountpoint, err := cgroup2Mountpoint() 169 if err != nil { 170 return "", "", err 171 } 172 173 if parent == "" || parent == cgroupParentSelf { 174 group, err := selfCgroupV2Path() 175 if err != nil { 176 return "", "", err 177 } 178 return mountpoint, group, nil 179 } 180 if !filepath.IsAbs(parent) { 181 return "", "", fmt.Errorf("cgroup parent must be %q or an absolute delegated cgroupfs path: %q", cgroupParentSelf, parent) 182 } 183 184 cleanParent := filepath.Clean(parent) 185 rel, err := filepath.Rel(mountpoint, cleanParent) 186 if err != nil { 187 return "", "", fmt.Errorf("resolve cgroup parent %q relative to cgroup2 mount %q: %w", cleanParent, mountpoint, err) 188 } 189 if rel == ".." || strings.HasPrefix(rel, "../") { 190 return "", "", fmt.Errorf("cgroup parent %q is outside cgroup2 mount %q", cleanParent, mountpoint) 191 } 192 if rel == "." { 193 return mountpoint, "/", nil 194 } 195 196 group := "/" + filepath.ToSlash(rel) 197 if err := cgroup2.VerifyGroupPath(group); err != nil { 198 return "", "", fmt.Errorf("invalid cgroup parent path %q: %w", group, err) 199 } 200 return mountpoint, group, nil 201} 202 203func cgroup2Mountpoint() (string, error) { 204 mounts, err := procfs.GetMounts() 205 if err != nil { 206 return "", fmt.Errorf("read procfs mountinfo: %w", err) 207 } 208 for _, mount := range mounts { 209 if mount.FSType == "cgroup2" { 210 return mount.MountPoint, nil 211 } 212 } 213 return "", fmt.Errorf("cgroup v2 mountpoint not found") 214} 215 216func selfCgroupV2Path() (string, error) { 217 self, err := procfs.Self() 218 if err != nil { 219 return "", fmt.Errorf("open procfs self: %w", err) 220 } 221 groups, err := self.Cgroups() 222 if err != nil { 223 return "", fmt.Errorf("read procfs self cgroups: %w", err) 224 } 225 for _, group := range groups { 226 if group.HierarchyID != 0 { 227 continue 228 } 229 path := group.Path 230 if path == "" { 231 path = "/" 232 } 233 if err := cgroup2.VerifyGroupPath(path); err != nil { 234 return "", fmt.Errorf("invalid self cgroup path %q: %w", path, err) 235 } 236 return path, nil 237 } 238 return "", fmt.Errorf("current process has no cgroup v2 hierarchy entry") 239} 240 241func moveParentProcesses(parent *cgroup2.Manager, supervisorMemoryMinMiB int64, logger *slog.Logger) error { 242 procs, err := parent.Procs(false) 243 if err != nil { 244 return fmt.Errorf("list parent cgroup processes: %w", err) 245 } 246 247 // first create with empty resources 248 supervisor, err := parent.NewChild(supervisorCgroupName, &cgroup2.Resources{}) 249 if err != nil { 250 return fmt.Errorf("create supervisor cgroup: %w", err) 251 } 252 253 // move procs 254 for _, pid := range procs { 255 if err := supervisor.AddProc(pid); err != nil { 256 return fmt.Errorf("move pid %d to supervisor cgroup: %w", pid, err) 257 } 258 } 259 260 // now apply resources. we can't do this while parent has procs still 261 if res := supervisorResources(supervisorMemoryMinMiB); res != nil { 262 // we use a "new" parent here, this is so we enable subtree_control. 263 // .Update() does not work here... 264 if _, err = parent.NewChild(supervisorCgroupName, res); err != nil { 265 return fmt.Errorf("apply supervisor cgroup resources: %w", err) 266 } 267 } 268 269 if logger != nil && len(procs) > 0 { 270 logger.Info("moved spindle processes to supervisor cgroup", "processes", len(procs)) 271 } 272 return nil 273} 274 275func sanitizeCgroupName(name string) string { 276 name = cgroupInvalidChar.ReplaceAllLiteralString(name, "-") 277 name = cgroupConsecutiveSep.ReplaceAllLiteralString(name, "-") 278 return strings.Trim(name, "-_.") 279}