Monorepo for Tangled
tangled.org
1// NOTE: lot's of code compied from Gitea with slight modification to use go-git objects
2
3package gitea
4
5import (
6 "bufio"
7 "bytes"
8 "context"
9 "fmt"
10 "io"
11 "math"
12 "os/exec"
13 "strconv"
14 "strings"
15
16 "github.com/djherbis/buffer"
17 "github.com/djherbis/nio/v3"
18 "github.com/go-git/go-git/v5/plumbing"
19 "github.com/go-git/go-git/v5/plumbing/filemode"
20 "github.com/go-git/go-git/v5/plumbing/hash"
21 "github.com/go-git/go-git/v5/plumbing/object"
22)
23
24func GetCommit(ctx context.Context, repoPath, rev string) (*object.Commit, error) {
25 wr, rd, cancel := CatFileBatch(ctx, repoPath)
26 defer cancel()
27
28 if _, err := wr.Write([]byte(rev + "\n")); err != nil {
29 return nil, fmt.Errorf("write rev: %w", err)
30 }
31 sha, typ, size, err := ReadBatchLine(rd)
32 if err != nil {
33 return nil, err
34 }
35 if typ != "commit" {
36 if err := DiscardFull(rd, size+1); err != nil {
37 return nil, err
38 }
39 return nil, fmt.Errorf("unexpected type: %s for commit: %s", typ, rev)
40 }
41 commit, err := ReadCommit(plumbing.NewHash(string(sha)), io.LimitReader(rd, size))
42 if err != nil {
43 return nil, fmt.Errorf("read commit %s: %w", rev, err)
44 }
45 if _, err := rd.Discard(1); err != nil {
46 return nil, err
47 }
48 return commit, nil
49}
50
51func GetTree(ctx context.Context, repoPath, rev string) (*object.Tree, error) {
52 bw, br, cancel := CatFileBatch(ctx, repoPath)
53 defer cancel()
54
55 return BatchGetTree(bw, br, rev)
56}
57
58func BatchGetTree(bw io.WriteCloser, br *bufio.Reader, rev string) (*object.Tree, error) {
59 if _, err := bw.Write([]byte(rev + "\n")); err != nil {
60 return nil, fmt.Errorf("write rev: %w", err)
61 }
62 sha, typ, size, err := ReadBatchLine(br)
63 if err != nil {
64 return nil, fmt.Errorf("resolve %s: %w", rev, err)
65 }
66 if typ != "tree" {
67 if err := DiscardFull(br, size+1); err != nil {
68 return nil, err
69 }
70 return nil, fmt.Errorf("unexpected type: %s for tree: %s", typ, rev)
71 }
72
73 entries, err := catBatchParseTreeEntries(br, size)
74 if err != nil {
75 return nil, fmt.Errorf("read tree %s: %w", rev, err)
76 }
77 return &object.Tree{
78 Hash: plumbing.NewHash(string(sha)),
79 Entries: entries,
80 }, nil
81}
82
83func catBatchParseTreeEntries(rd *bufio.Reader, sz int64) ([]object.TreeEntry, error) {
84 entries := make([]object.TreeEntry, 0, 10)
85loop:
86 for sz > 0 {
87 mode, fname, sha, count, err := ParseCatFileTreeLine(rd)
88 if err != nil {
89 if err == io.EOF {
90 break loop
91 }
92 return nil, err
93 }
94 modeNum, err := strconv.ParseUint(string(mode), 8, 32)
95 if err != nil {
96 return nil, err
97 }
98 sz -= int64(count)
99 entry := object.TreeEntry{
100 Name: string(fname),
101 Mode: filemode.FileMode(modeNum),
102 Hash: plumbing.Hash(sha),
103 }
104 entries = append(entries, entry)
105 }
106 if _, err := rd.Discard(1); err != nil {
107 return entries, err
108 }
109 return entries, nil
110}
111
112func CatFileBatch(ctx context.Context, repoPath string) (io.WriteCloser, *bufio.Reader, func()) {
113 batchStdinReader, batchStdinWriter := io.Pipe()
114 batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
115 ctx, ctxCancel := context.WithCancel(ctx)
116 closed := make(chan struct{})
117 cancel := func() {
118 ctxCancel()
119 _ = batchStdinWriter.Close()
120 _ = batchStdoutReader.Close()
121 <-closed
122 }
123
124 // Ensure cancel is called as soon as the provided context is cancelled
125 go func() {
126 <-ctx.Done()
127 cancel()
128 }()
129
130 go func() {
131 stderr := &strings.Builder{}
132 cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "cat-file", "--batch")
133 cmd.Stdin = batchStdinReader
134 cmd.Stdout = batchStdoutWriter
135 cmd.Stderr = stderr
136 if err := cmd.Run(); err != nil {
137 _ = batchStdinReader.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
138 _ = batchStdoutWriter.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
139 } else {
140 _ = batchStdoutWriter.Close()
141 _ = batchStdinReader.Close()
142 }
143 close(closed)
144 }()
145
146 batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
147 return batchStdinWriter, batchReader, cancel
148}
149
150func ReadBatchLine(reader io.Reader) (sha []byte, typ string, size int64, err error) {
151 rd, ok := reader.(*bufio.Reader)
152 if !ok {
153 rd = bufio.NewReader(reader)
154 }
155 typ, err = rd.ReadString('\n')
156 if err != nil {
157 return sha, typ, size, err
158 }
159 if len(typ) == 1 {
160 typ, err = rd.ReadString('\n')
161 if err != nil {
162 return sha, typ, size, err
163 }
164 }
165 idx := strings.IndexByte(typ, ' ')
166 if idx < 0 {
167 return sha, typ, size, fmt.Errorf("missing sha: %s", sha)
168 }
169 sha = []byte(typ[:idx])
170 typ = typ[idx+1:]
171
172 idx = strings.IndexByte(typ, ' ')
173 if idx < 0 {
174 return sha, typ, size, fmt.Errorf("missing size: %s", sha)
175 }
176
177 sizeStr := typ[idx+1 : len(typ)-1]
178 typ = typ[:idx]
179
180 size, err = strconv.ParseInt(sizeStr, 10, 64)
181 return sha, typ, size, err
182}
183
184// NOTE: readCommit doesn't return complete go-git [object.Commit] object!
185// The embedded object store is missing, so calling method from returned commit
186// can lead to panic.
187func ReadCommit(oid plumbing.Hash, reader io.Reader) (*object.Commit, error) {
188 commit := &object.Commit{
189 Hash: oid,
190 ExtraHeaders: make(map[string][]byte),
191 }
192
193 payloadSB := new(strings.Builder)
194 signatureSB := new(strings.Builder)
195 messageSB := new(strings.Builder)
196 firstLine := true
197 message := false
198 pgpsig := false
199
200 bufReader, ok := reader.(*bufio.Reader)
201 if !ok {
202 bufReader = bufio.NewReader(reader)
203 }
204
205readLoop:
206 for {
207 line, err := bufReader.ReadBytes('\n')
208 if err != nil {
209 if err == io.EOF {
210 if message {
211 _, _ = messageSB.Write(line)
212 }
213 _, _ = payloadSB.Write(line)
214 break readLoop
215 }
216 return nil, err
217 }
218 if pgpsig {
219 if len(line) > 0 && line[0] == ' ' {
220 _, _ = signatureSB.Write(line[1:])
221 continue
222 }
223 pgpsig = false
224 }
225
226 if !message {
227 // This is probably not correct but is copied from go-gits interpretation...
228 trimmed := bytes.TrimSpace(line)
229 if len(trimmed) == 0 {
230 message = true
231 _, _ = payloadSB.Write(line)
232 continue
233 }
234
235 k, data, _ := bytes.Cut(line, []byte{' '})
236
237 switch string(k) {
238 case "tree":
239 commit.TreeHash = plumbing.NewHash(string(data))
240 _, _ = payloadSB.Write(line)
241 case "parent":
242 commit.ParentHashes = append(commit.ParentHashes, plumbing.NewHash(string(data)))
243 _, _ = payloadSB.Write(line)
244 case "author":
245 commit.Author.Decode(data)
246 _, _ = payloadSB.Write(line)
247 case "committer":
248 commit.Committer.Decode(data)
249 _, _ = payloadSB.Write(line)
250 case "gpgsig":
251 fallthrough
252 case "gpgsig-sha256": // FIXME: no intertop, so only 1 exists at present.
253 _, _ = signatureSB.Write(data)
254 _ = signatureSB.WriteByte('\n')
255 pgpsig = true
256 default:
257 commit.ExtraHeaders[string(k)] = bytes.TrimSpace(data)
258 // If the first line is not any of the known headers, then it is probably the prefix added when git cat-file is called with --batch, and that is not part of the payload
259 if !firstLine {
260 // Every subsequent header field is added to the payload
261 _, _ = payloadSB.Write(line)
262 }
263 }
264 } else {
265 _, _ = messageSB.Write(line)
266 _, _ = payloadSB.Write(line)
267 }
268
269 firstLine = false
270 }
271 commit.Message = messageSB.String()
272 // TODO: pass raw payload so we can verify it without reconstructing the payload
273 commit.PGPSignature = signatureSB.String()
274
275 return commit, nil
276}
277
278// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream
279// This carefully avoids allocations - except where fnameBuf is too small.
280// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
281//
282// Each line is composed of:
283// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
284//
285// We don't attempt to convert the raw HASH to save a lot of time
286func ParseCatFileTreeLine(rd *bufio.Reader) (mode, fname, sha []byte, n int, err error) {
287 modeBuf := make([]byte, 40)
288 fnameBuf := make([]byte, 4096)
289 shaBuf := make([]byte, hash.HexSize)
290
291 var readBytes []byte
292
293 // Read the Mode & fname
294 readBytes, err = rd.ReadSlice('\x00')
295 if err != nil {
296 return mode, fname, sha, n, err
297 }
298 idx := bytes.IndexByte(readBytes, ' ')
299 if idx < 0 {
300 return mode, fname, sha, n, fmt.Errorf("missing")
301 }
302
303 n += idx + 1
304 copy(modeBuf, readBytes[:idx])
305 if len(modeBuf) >= idx {
306 modeBuf = modeBuf[:idx]
307 } else {
308 modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
309 }
310 mode = modeBuf
311
312 readBytes = readBytes[idx+1:]
313
314 // Deal with the fname
315 copy(fnameBuf, readBytes)
316 if len(fnameBuf) > len(readBytes) {
317 fnameBuf = fnameBuf[:len(readBytes)]
318 } else {
319 fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
320 }
321 for err == bufio.ErrBufferFull {
322 readBytes, err = rd.ReadSlice('\x00')
323 fnameBuf = append(fnameBuf, readBytes...)
324 }
325 n += len(fnameBuf)
326 if err != nil {
327 return mode, fname, sha, n, err
328 }
329 fnameBuf = fnameBuf[:len(fnameBuf)-1]
330 fname = fnameBuf
331
332 // Deal with the binary hash
333 idx = 0
334 length := hash.HexSize / 2
335 for idx < length {
336 var read int
337 read, err = rd.Read(shaBuf[idx:length])
338 n += read
339 if err != nil {
340 return mode, fname, sha, n, err
341 }
342 idx += read
343 }
344 sha = shaBuf
345 return mode, fname, sha, n, err
346}
347
348func DiscardFull(rd *bufio.Reader, discard int64) error {
349 if discard > math.MaxInt32 {
350 n, err := rd.Discard(math.MaxInt32)
351 discard -= int64(n)
352 if err != nil {
353 return err
354 }
355 }
356 for discard > 0 {
357 n, err := rd.Discard(int(discard))
358 discard -= int64(n)
359 if err != nil {
360 return err
361 }
362 }
363 return nil
364}