Monorepo for Tangled
tangled.org
1// NOTE: lot's of code compied from Gitea with slight modification to use go-git objects
2
3package gitea
4
5import (
6 "bufio"
7 "bytes"
8 "context"
9 "fmt"
10 "io"
11 "math"
12 "os/exec"
13 "strconv"
14 "strings"
15
16 "github.com/djherbis/buffer"
17 "github.com/djherbis/nio/v3"
18 "github.com/go-git/go-git/v5/plumbing"
19 "github.com/go-git/go-git/v5/plumbing/filemode"
20 "github.com/go-git/go-git/v5/plumbing/hash"
21 "github.com/go-git/go-git/v5/plumbing/object"
22)
23
24func GetCommit(ctx context.Context, repoPath, rev string) (*object.Commit, error) {
25 wr, rd, cancel := CatFileBatch(ctx, repoPath)
26 defer cancel()
27
28 if _, err := wr.Write([]byte(rev + "\n")); err != nil {
29 return nil, fmt.Errorf("write rev: %w", err)
30 }
31 sha, typ, size, err := ReadBatchLine(rd)
32 if err != nil {
33 return nil, err
34 }
35 if typ != "commit" {
36 if err := DiscardFull(rd, size+1); err != nil {
37 return nil, err
38 }
39 return nil, fmt.Errorf("unexpected type: %s for commit: %s", typ, rev)
40 }
41 commit, err := ReadCommit(plumbing.NewHash(string(sha)), io.LimitReader(rd, size))
42 if err != nil {
43 return nil, fmt.Errorf("read commit %s: %w", rev, err)
44 }
45 if _, err := rd.Discard(1); err != nil {
46 return nil, err
47 }
48 return commit, nil
49}
50
51func GetTree(ctx context.Context, repoPath, rev string) (*object.Tree, error) {
52 bw, br, cancel := CatFileBatch(ctx, repoPath)
53 defer cancel()
54
55 return BatchGetTree(bw, br, rev)
56}
57
58func BatchGetTree(bw io.WriteCloser, br *bufio.Reader, rev string) (*object.Tree, error) {
59 if _, err := bw.Write([]byte(rev + "\n")); err != nil {
60 return nil, fmt.Errorf("write rev: %w", err)
61 }
62 sha, typ, size, err := ReadBatchLine(br)
63 if err != nil {
64 return nil, fmt.Errorf("resolve %s: %w", rev, err)
65 }
66 if typ != "tree" {
67 if err := DiscardFull(br, size+1); err != nil {
68 return nil, err
69 }
70 return nil, fmt.Errorf("unexpected type: %s for tree: %s", typ, rev)
71 }
72
73 entries, err := catBatchParseTreeEntries(br, size)
74 if err != nil {
75 return nil, fmt.Errorf("read tree %s: %w", rev, err)
76 }
77 return &object.Tree{
78 Hash: plumbing.NewHash(string(sha)),
79 Entries: entries,
80 }, nil
81}
82
83func catBatchParseTreeEntries(rd *bufio.Reader, sz int64) ([]object.TreeEntry, error) {
84 entries := make([]object.TreeEntry, 0, 10)
85loop:
86 for sz > 0 {
87 mode, fname, sha, count, err := ParseCatFileTreeLine(rd)
88 if err != nil {
89 if err == io.EOF {
90 break loop
91 }
92 return nil, err
93 }
94 modeNum, err := strconv.ParseUint(string(mode), 8, 32)
95 if err != nil {
96 return nil, err
97 }
98 sz -= int64(count)
99 entry := object.TreeEntry{
100 Name: string(fname),
101 Mode: filemode.FileMode(modeNum),
102 Hash: plumbing.Hash(sha),
103 }
104 entries = append(entries, entry)
105 }
106 if _, err := rd.Discard(1); err != nil {
107 return entries, err
108 }
109 return entries, nil
110}
111
112func CatFileBatchCheck(ctx context.Context, repoPath string) (io.WriteCloser, *bufio.Reader, func()) {
113 batchStdinReader, batchStdinWriter := io.Pipe()
114 batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
115 ctx, ctxCancel := context.WithCancel(ctx)
116 closed := make(chan struct{})
117 cancel := func() {
118 ctxCancel()
119 _ = batchStdinWriter.Close()
120 _ = batchStdoutReader.Close()
121 <-closed
122 }
123
124 // Ensure cancel is called as soon as the provided context is cancelled
125 go func() {
126 <-ctx.Done()
127 cancel()
128 }()
129
130 go func() {
131 stderr := &strings.Builder{}
132 cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "cat-file", "--batch-check")
133 cmd.Stdin = batchStdinReader
134 cmd.Stdout = batchStdoutWriter
135 cmd.Stderr = stderr
136 if err := cmd.Run(); err != nil {
137 _ = batchStdinReader.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
138 _ = batchStdoutWriter.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
139 } else {
140 _ = batchStdoutWriter.Close()
141 _ = batchStdinReader.Close()
142 }
143 close(closed)
144 }()
145
146 batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
147 return batchStdinWriter, batchReader, cancel
148}
149
150func CatFileBatch(ctx context.Context, repoPath string) (io.WriteCloser, *bufio.Reader, func()) {
151 batchStdinReader, batchStdinWriter := io.Pipe()
152 batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
153 ctx, ctxCancel := context.WithCancel(ctx)
154 closed := make(chan struct{})
155 cancel := func() {
156 ctxCancel()
157 _ = batchStdinWriter.Close()
158 _ = batchStdoutReader.Close()
159 <-closed
160 }
161
162 // Ensure cancel is called as soon as the provided context is cancelled
163 go func() {
164 <-ctx.Done()
165 cancel()
166 }()
167
168 go func() {
169 stderr := &strings.Builder{}
170 cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "cat-file", "--batch")
171 cmd.Stdin = batchStdinReader
172 cmd.Stdout = batchStdoutWriter
173 cmd.Stderr = stderr
174 if err := cmd.Run(); err != nil {
175 _ = batchStdinReader.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
176 _ = batchStdoutWriter.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
177 } else {
178 _ = batchStdoutWriter.Close()
179 _ = batchStdinReader.Close()
180 }
181 close(closed)
182 }()
183
184 batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
185 return batchStdinWriter, batchReader, cancel
186}
187
188func ReadBatchLine(reader io.Reader) (sha []byte, typ string, size int64, err error) {
189 rd, ok := reader.(*bufio.Reader)
190 if !ok {
191 rd = bufio.NewReader(reader)
192 }
193 typ, err = rd.ReadString('\n')
194 if err != nil {
195 return sha, typ, size, err
196 }
197 if len(typ) == 1 {
198 typ, err = rd.ReadString('\n')
199 if err != nil {
200 return sha, typ, size, err
201 }
202 }
203 idx := strings.IndexByte(typ, ' ')
204 if idx < 0 {
205 return sha, typ, size, fmt.Errorf("missing sha: %s", sha)
206 }
207 sha = []byte(typ[:idx])
208 typ = typ[idx+1:]
209
210 idx = strings.IndexByte(typ, ' ')
211 if idx < 0 {
212 return sha, typ, size, fmt.Errorf("missing size: %s", sha)
213 }
214
215 sizeStr := typ[idx+1 : len(typ)-1]
216 typ = typ[:idx]
217
218 size, err = strconv.ParseInt(sizeStr, 10, 64)
219 return sha, typ, size, err
220}
221
222// NOTE: readCommit doesn't return complete go-git [object.Commit] object!
223// The embedded object store is missing, so calling method from returned commit
224// can lead to panic.
225func ReadCommit(oid plumbing.Hash, reader io.Reader) (*object.Commit, error) {
226 commit := &object.Commit{
227 Hash: oid,
228 ExtraHeaders: make(map[string][]byte),
229 }
230
231 payloadSB := new(strings.Builder)
232 signatureSB := new(strings.Builder)
233 messageSB := new(strings.Builder)
234 firstLine := true
235 message := false
236 pgpsig := false
237
238 bufReader, ok := reader.(*bufio.Reader)
239 if !ok {
240 bufReader = bufio.NewReader(reader)
241 }
242
243readLoop:
244 for {
245 line, err := bufReader.ReadBytes('\n')
246 if err != nil {
247 if err == io.EOF {
248 if message {
249 _, _ = messageSB.Write(line)
250 }
251 _, _ = payloadSB.Write(line)
252 break readLoop
253 }
254 return nil, err
255 }
256 if pgpsig {
257 if len(line) > 0 && line[0] == ' ' {
258 _, _ = signatureSB.Write(line[1:])
259 continue
260 }
261 pgpsig = false
262 }
263
264 if !message {
265 // This is probably not correct but is copied from go-gits interpretation...
266 trimmed := bytes.TrimSpace(line)
267 if len(trimmed) == 0 {
268 message = true
269 _, _ = payloadSB.Write(line)
270 continue
271 }
272
273 k, data, _ := bytes.Cut(line, []byte{' '})
274
275 switch string(k) {
276 case "tree":
277 commit.TreeHash = plumbing.NewHash(string(data))
278 _, _ = payloadSB.Write(line)
279 case "parent":
280 commit.ParentHashes = append(commit.ParentHashes, plumbing.NewHash(string(data)))
281 _, _ = payloadSB.Write(line)
282 case "author":
283 commit.Author.Decode(data)
284 _, _ = payloadSB.Write(line)
285 case "committer":
286 commit.Committer.Decode(data)
287 _, _ = payloadSB.Write(line)
288 case "gpgsig":
289 fallthrough
290 case "gpgsig-sha256": // FIXME: no intertop, so only 1 exists at present.
291 _, _ = signatureSB.Write(data)
292 _ = signatureSB.WriteByte('\n')
293 pgpsig = true
294 default:
295 commit.ExtraHeaders[string(k)] = bytes.TrimSpace(data)
296 // If the first line is not any of the known headers, then it is probably the prefix added when git cat-file is called with --batch, and that is not part of the payload
297 if !firstLine {
298 // Every subsequent header field is added to the payload
299 _, _ = payloadSB.Write(line)
300 }
301 }
302 } else {
303 _, _ = messageSB.Write(line)
304 _, _ = payloadSB.Write(line)
305 }
306
307 firstLine = false
308 }
309 commit.Message = messageSB.String()
310 // TODO: pass raw payload so we can verify it without reconstructing the payload
311 commit.PGPSignature = signatureSB.String()
312
313 return commit, nil
314}
315
316// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream
317// This carefully avoids allocations - except where fnameBuf is too small.
318// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
319//
320// Each line is composed of:
321// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
322//
323// We don't attempt to convert the raw HASH to save a lot of time
324func ParseCatFileTreeLine(rd *bufio.Reader) (mode, fname, sha []byte, n int, err error) {
325 modeBuf := make([]byte, 40)
326 fnameBuf := make([]byte, 4096)
327 shaBuf := make([]byte, hash.HexSize)
328
329 var readBytes []byte
330
331 // Read the Mode & fname
332 readBytes, err = rd.ReadSlice('\x00')
333 if err != nil {
334 return mode, fname, sha, n, err
335 }
336 idx := bytes.IndexByte(readBytes, ' ')
337 if idx < 0 {
338 return mode, fname, sha, n, fmt.Errorf("missing")
339 }
340
341 n += idx + 1
342 copy(modeBuf, readBytes[:idx])
343 if len(modeBuf) >= idx {
344 modeBuf = modeBuf[:idx]
345 } else {
346 modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
347 }
348 mode = modeBuf
349
350 readBytes = readBytes[idx+1:]
351
352 // Deal with the fname
353 copy(fnameBuf, readBytes)
354 if len(fnameBuf) > len(readBytes) {
355 fnameBuf = fnameBuf[:len(readBytes)]
356 } else {
357 fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
358 }
359 for err == bufio.ErrBufferFull {
360 readBytes, err = rd.ReadSlice('\x00')
361 fnameBuf = append(fnameBuf, readBytes...)
362 }
363 n += len(fnameBuf)
364 if err != nil {
365 return mode, fname, sha, n, err
366 }
367 fnameBuf = fnameBuf[:len(fnameBuf)-1]
368 fname = fnameBuf
369
370 // Deal with the binary hash
371 idx = 0
372 length := hash.HexSize / 2
373 for idx < length {
374 var read int
375 read, err = rd.Read(shaBuf[idx:length])
376 n += read
377 if err != nil {
378 return mode, fname, sha, n, err
379 }
380 idx += read
381 }
382 sha = shaBuf
383 return mode, fname, sha, n, err
384}
385
386func DiscardFull(rd *bufio.Reader, discard int64) error {
387 if discard > math.MaxInt32 {
388 n, err := rd.Discard(math.MaxInt32)
389 discard -= int64(n)
390 if err != nil {
391 return err
392 }
393 }
394 for discard > 0 {
395 n, err := rd.Discard(int(discard))
396 discard -= int64(n)
397 if err != nil {
398 return err
399 }
400 }
401 return nil
402}