Monorepo for Tangled
tangled.org
1// Copyright 2021 The Gitea Authors. All rights reserved.
2// SPDX-License-Identifier: MIT
3
4package gitea
5
6import (
7 "bufio"
8 "bytes"
9 "context"
10 "errors"
11 "fmt"
12 "io"
13 "os/exec"
14 "path"
15 "strings"
16
17 "github.com/djherbis/buffer"
18 "github.com/djherbis/nio/v3"
19 "tangled.org/core/sets"
20)
21
22// LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
23func LogNameStatusRepo(ctx context.Context, repository, headRef, treepath string, paths ...string) (*bufio.Reader, func()) {
24 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
25 // so let's create a batch stdin and stdout
26 stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
27
28 // Lets also create a context so that we can absolutely ensure that the command should die when we're done
29 ctx, ctxCancel := context.WithCancel(ctx)
30
31 cancel := func() {
32 ctxCancel()
33 _ = stdoutReader.Close()
34 _ = stdoutWriter.Close()
35 }
36
37 cmd := exec.CommandContext(ctx,
38 "git",
39 "log",
40 "--name-status",
41 "-c",
42 "--format=commit%x00%H %P%x00",
43 "--parents",
44 "--no-renames",
45 "-t",
46 "-z",
47 headRef,
48 )
49
50 var files []string
51 if len(paths) < 70 {
52 if treepath != "" {
53 files = append(files, treepath)
54 for _, pth := range paths {
55 if pth != "" {
56 files = append(files, path.Join(treepath, pth))
57 }
58 }
59 } else {
60 for _, pth := range paths {
61 if pth != "" {
62 files = append(files, pth)
63 }
64 }
65 }
66 } else if treepath != "" {
67 files = append(files, treepath)
68 }
69 // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
70 for i, file := range files {
71 files[i] = ":(literal)" + file
72 }
73 cmd.Args = append(cmd.Args, files...)
74
75 go func() {
76 stderr := &strings.Builder{}
77 cmd.Dir = repository
78 cmd.Stdout = stdoutWriter
79 cmd.Stderr = stderr
80 if err := cmd.Run(); err != nil {
81 _ = stdoutWriter.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
82 return
83 }
84
85 _ = stdoutWriter.Close()
86 }()
87
88 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
89 bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
90
91 return bufReader, cancel
92}
93
94// LogNameStatusRepoParser parses a git log raw output from LogRawRepo
95type LogNameStatusRepoParser struct {
96 treepath string
97 paths []string
98 next []byte
99 buffull bool
100 rd *bufio.Reader
101 cancel func()
102}
103
104// NewLogNameStatusRepoParser returns a new parser for a git log raw output
105func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
106 rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
107 return &LogNameStatusRepoParser{
108 treepath: treepath,
109 paths: paths,
110 rd: rd,
111 cancel: cancel,
112 }
113}
114
115// LogNameStatusCommitData represents a commit artefact from git log raw
116type LogNameStatusCommitData struct {
117 CommitID string
118 ParentIDs []string
119 Paths []bool
120}
121
122// Next returns the next LogStatusCommitData
123func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
124 var err error
125 if len(g.next) == 0 {
126 g.buffull = false
127 g.next, err = g.rd.ReadSlice('\x00')
128 if err != nil {
129 switch err {
130 case bufio.ErrBufferFull:
131 g.buffull = true
132 case io.EOF:
133 return nil, nil
134 default:
135 return nil, err
136 }
137 }
138 }
139
140 ret := LogNameStatusCommitData{}
141 if bytes.Equal(g.next, []byte("commit\000")) {
142 g.next, err = g.rd.ReadSlice('\x00')
143 if err != nil {
144 switch err {
145 case bufio.ErrBufferFull:
146 g.buffull = true
147 case io.EOF:
148 return nil, nil
149 default:
150 return nil, err
151 }
152 }
153 }
154
155 // Our "line" must look like: <commitid> SP (<parent> SP) * NUL
156 commitIDs := string(g.next)
157 if g.buffull {
158 more, err := g.rd.ReadString('\x00')
159 if err != nil {
160 return nil, err
161 }
162 commitIDs += more
163 }
164 commitIDs = commitIDs[:len(commitIDs)-1]
165 splitIDs := strings.Split(commitIDs, " ")
166 ret.CommitID = splitIDs[0]
167 if len(splitIDs) > 1 {
168 ret.ParentIDs = splitIDs[1:]
169 }
170
171 // now read the next "line"
172 g.buffull = false
173 g.next, err = g.rd.ReadSlice('\x00')
174 if err != nil {
175 if err == bufio.ErrBufferFull {
176 g.buffull = true
177 } else if err != io.EOF {
178 return nil, err
179 }
180 }
181
182 if err == io.EOF || (g.next[0] != '\n' && g.next[0] != '\000') {
183 return &ret, nil
184 }
185
186 // Ok we have some changes.
187 // This line will look like: NL <fname> NUL
188 //
189 // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
190 if g.next[0] == '\n' {
191 g.next = g.next[1:]
192 } else {
193 g.buffull = false
194 g.next, err = g.rd.ReadSlice('\x00')
195 if err != nil {
196 if err == bufio.ErrBufferFull {
197 g.buffull = true
198 } else if err != io.EOF {
199 return nil, err
200 }
201 }
202 if len(g.next) == 0 {
203 return &ret, nil
204 }
205 if g.next[0] == '\x00' {
206 g.buffull = false
207 g.next, err = g.rd.ReadSlice('\x00')
208 if err != nil {
209 if err == bufio.ErrBufferFull {
210 g.buffull = true
211 } else if err != io.EOF {
212 return nil, err
213 }
214 }
215 }
216 }
217
218 fnameBuf := make([]byte, 4096)
219
220diffloop:
221 for {
222 if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
223 return &ret, nil
224 }
225 g.next, err = g.rd.ReadSlice('\x00')
226 if err != nil {
227 switch err {
228 case bufio.ErrBufferFull:
229 g.buffull = true
230 case io.EOF:
231 return &ret, nil
232 default:
233 return nil, err
234 }
235 }
236 copy(fnameBuf, g.next)
237 if len(fnameBuf) < len(g.next) {
238 fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
239 } else {
240 fnameBuf = fnameBuf[:len(g.next)]
241 }
242 if err != nil {
243 if err != bufio.ErrBufferFull {
244 return nil, err
245 }
246 more, err := g.rd.ReadBytes('\x00')
247 if err != nil {
248 return nil, err
249 }
250 fnameBuf = append(fnameBuf, more...)
251 }
252
253 // read the next line
254 g.buffull = false
255 g.next, err = g.rd.ReadSlice('\x00')
256 if err != nil {
257 if err == bufio.ErrBufferFull {
258 g.buffull = true
259 } else if err != io.EOF {
260 return nil, err
261 }
262 }
263
264 if treepath != "" {
265 if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
266 fnameBuf = fnameBuf[:cap(fnameBuf)]
267 continue diffloop
268 }
269 }
270 fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
271 if len(fnameBuf) > maxpathlen {
272 fnameBuf = fnameBuf[:cap(fnameBuf)]
273 continue diffloop
274 }
275 if len(fnameBuf) > 0 {
276 if len(treepath) > 0 {
277 if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
278 fnameBuf = fnameBuf[:cap(fnameBuf)]
279 continue diffloop
280 }
281 fnameBuf = fnameBuf[1:]
282 } else if bytes.IndexByte(fnameBuf, '/') >= 0 {
283 fnameBuf = fnameBuf[:cap(fnameBuf)]
284 continue diffloop
285 }
286 }
287
288 idx, ok := paths2ids[string(fnameBuf)]
289 if !ok {
290 fnameBuf = fnameBuf[:cap(fnameBuf)]
291 continue diffloop
292 }
293 if ret.Paths == nil {
294 ret.Paths = changed
295 }
296 changed[idx] = true
297 }
298}
299
300// Close closes the parser
301func (g *LogNameStatusRepoParser) Close() {
302 g.cancel()
303}
304
305// WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
306func WalkGitLog(ctx context.Context, repoPath string, head, treepath string, paths ...string) (map[string]string, error) {
307 path2idx := map[string]int{}
308 maxpathlen := len(treepath)
309
310 for i := range paths {
311 path2idx[paths[i]] = i
312 pthlen := len(paths[i]) + len(treepath) + 1
313 if pthlen > maxpathlen {
314 maxpathlen = pthlen
315 }
316 }
317
318 g := NewLogNameStatusRepoParser(ctx, repoPath, head, treepath, paths...)
319 // don't use defer g.Close() here as g may change its value - instead wrap in a func
320 defer func() {
321 g.Close()
322 }()
323
324 results := make([]string, len(paths))
325 remaining := len(paths)
326 nextRestart := min((len(paths)*3)/4, 70)
327 lastEmptyParent := head
328 commitSinceLastEmptyParent := uint64(0)
329 commitSinceNextRestart := uint64(0)
330 parentRemaining := sets.New[string]()
331
332 changed := make([]bool, len(paths))
333
334heaploop:
335 for {
336 select {
337 case <-ctx.Done():
338 if ctx.Err() == context.DeadlineExceeded {
339 break heaploop
340 }
341 g.Close()
342 return nil, ctx.Err()
343 default:
344 }
345 current, err := g.Next(treepath, path2idx, changed, maxpathlen)
346 if err != nil {
347 if errors.Is(err, context.DeadlineExceeded) {
348 break heaploop
349 }
350 g.Close()
351 return nil, err
352 }
353 if current == nil {
354 break heaploop
355 }
356 parentRemaining.Remove(current.CommitID)
357 for i, found := range current.Paths {
358 if !found {
359 continue
360 }
361 changed[i] = false
362 if results[i] == "" {
363 results[i] = current.CommitID
364 delete(path2idx, paths[i])
365 remaining--
366 if results[0] == "" {
367 results[0] = current.CommitID
368 delete(path2idx, "")
369 remaining--
370 }
371 }
372 }
373
374 if remaining <= 0 {
375 break heaploop
376 }
377 commitSinceLastEmptyParent++
378 if parentRemaining.Len() == 0 {
379 lastEmptyParent = current.CommitID
380 commitSinceLastEmptyParent = 0
381 }
382 if remaining <= nextRestart {
383 commitSinceNextRestart++
384 if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
385 g.Close()
386 remainingPaths := make([]string, 0, len(paths))
387 for i, pth := range paths {
388 if results[i] == "" {
389 remainingPaths = append(remainingPaths, pth)
390 }
391 }
392 g = NewLogNameStatusRepoParser(ctx, repoPath, lastEmptyParent, treepath, remainingPaths...)
393 parentRemaining = sets.New[string]()
394 nextRestart = (remaining * 3) / 4
395 continue heaploop
396 }
397 }
398 for _, id := range current.ParentIDs {
399 parentRemaining.Insert(id)
400 }
401 }
402 g.Close()
403
404 resultsMap := map[string]string{}
405 for i, pth := range paths {
406 resultsMap[pth] = results[i]
407 }
408
409 return resultsMap, nil
410}