Monorepo for Tangled
tangled.org
1// NOTE: lot's of code compied from Gitea with slight modification to use go-git objects
2
3package gitea
4
5import (
6 "bufio"
7 "bytes"
8 "context"
9 "fmt"
10 "io"
11 "math"
12 "os/exec"
13 "strconv"
14 "strings"
15
16 "github.com/djherbis/buffer"
17 "github.com/djherbis/nio/v3"
18 "github.com/go-git/go-git/v5/plumbing"
19 "github.com/go-git/go-git/v5/plumbing/filemode"
20 "github.com/go-git/go-git/v5/plumbing/hash"
21 "github.com/go-git/go-git/v5/plumbing/object"
22)
23
24func GetCommit(ctx context.Context, repoPath, rev string) (*object.Commit, error) {
25 wr, rd, cancel := CatFileBatch(ctx, repoPath)
26 defer cancel()
27
28 if _, err := wr.Write([]byte(rev + "\n")); err != nil {
29 return nil, fmt.Errorf("write rev: %w", err)
30 }
31 sha, typ, size, err := ReadBatchLine(rd)
32 if err != nil {
33 return nil, err
34 }
35 if typ != "commit" {
36 if err := DiscardFull(rd, size+1); err != nil {
37 return nil, err
38 }
39 return nil, fmt.Errorf("unexpected type: %s for commit: %s", typ, rev)
40 }
41 commit, err := ReadCommit(plumbing.NewHash(string(sha)), io.LimitReader(rd, size))
42 if err != nil {
43 return nil, fmt.Errorf("read commit %s: %w", rev, err)
44 }
45 if _, err := rd.Discard(1); err != nil {
46 return nil, err
47 }
48 return commit, nil
49}
50
51func GetTree(ctx context.Context, repoPath, rev string) (*object.Tree, error) {
52 bw, br, cancel := CatFileBatch(ctx, repoPath)
53 defer cancel()
54
55 return BatchGetTree(bw, br, rev)
56}
57
58func BatchGetTree(bw io.WriteCloser, br *bufio.Reader, rev string) (*object.Tree, error) {
59 if _, err := bw.Write([]byte(rev + "\n")); err != nil {
60 return nil, fmt.Errorf("write rev: %w", err)
61 }
62 sha, typ, size, err := ReadBatchLine(br)
63 if err != nil {
64 return nil, fmt.Errorf("resolve %s: %w", rev, err)
65 }
66 if typ != "tree" {
67 if err := DiscardFull(br, size+1); err != nil {
68 return nil, err
69 }
70 return nil, fmt.Errorf("unexpected type: %s for tree: %s", typ, rev)
71 }
72
73 entries, err := catBatchParseTreeEntries(br, size)
74 if err != nil {
75 return nil, fmt.Errorf("read tree %s: %w", rev, err)
76 }
77 return &object.Tree{
78 Hash: plumbing.NewHash(string(sha)),
79 Entries: entries,
80 }, nil
81}
82
83func catBatchParseTreeEntries(rd *bufio.Reader, sz int64) ([]object.TreeEntry, error) {
84 entries := make([]object.TreeEntry, 0, 10)
85loop:
86 for sz > 0 {
87 mode, fname, sha, count, err := ParseCatFileTreeLine(rd)
88 if err != nil {
89 if err == io.EOF {
90 break loop
91 }
92 return nil, err
93 }
94 modeNum, err := strconv.ParseUint(string(mode), 8, 32)
95 if err != nil {
96 return nil, err
97 }
98 sz -= int64(count)
99 entry := object.TreeEntry{
100 Name: string(fname),
101 Mode: filemode.FileMode(modeNum),
102 Hash: plumbing.Hash(sha),
103 }
104 entries = append(entries, entry)
105 }
106 if _, err := rd.Discard(1); err != nil {
107 return entries, err
108 }
109 return entries, nil
110}
111
112func CatFileBatch(ctx context.Context, repoPath string) (io.WriteCloser, *bufio.Reader, func()) {
113 batchStdinReader, batchStdinWriter := io.Pipe()
114 batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
115 ctx, ctxCancel := context.WithCancel(ctx)
116 closed := make(chan struct{})
117 cancel := func() {
118 ctxCancel()
119 _ = batchStdinWriter.Close()
120 _ = batchStdoutReader.Close()
121 <-closed
122 }
123
124 // Ensure cancel is called as soon as the provided context is cancelled
125 go func() {
126 <-ctx.Done()
127 cancel()
128 }()
129
130 go func() {
131 stderr := &strings.Builder{}
132 cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "cat-file", "--batch")
133 cmd.Stdin = batchStdinReader
134 cmd.Stdout = batchStdoutWriter
135 cmd.Stderr = stderr
136 if err := cmd.Run(); err != nil {
137 _ = batchStdinReader.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
138 _ = batchStdoutWriter.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String()))
139 } else {
140 _ = batchStdoutWriter.Close()
141 _ = batchStdinReader.Close()
142 }
143 close(closed)
144 }()
145
146 batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
147 return batchStdinWriter, batchReader, cancel
148}
149
150func ReadBatchLine(reader io.Reader) (sha []byte, typ string, size int64, err error) {
151 rd, ok := reader.(*bufio.Reader)
152 if !ok {
153 rd = bufio.NewReader(reader)
154 }
155 typ, err = rd.ReadString('\n')
156 if err != nil {
157 return sha, typ, size, err
158 }
159 if len(typ) == 1 {
160 typ, err = rd.ReadString('\n')
161 if err != nil {
162 return sha, typ, size, err
163 }
164 }
165 idx := strings.IndexByte(typ, ' ')
166 if idx < 0 {
167 return sha, typ, size, fmt.Errorf("missing sha: %s", sha)
168 }
169 sha = []byte(typ[:idx])
170 typ = typ[idx+1:]
171
172 idx = strings.IndexByte(typ, ' ')
173 if idx < 0 {
174 return sha, typ, size, fmt.Errorf("missing size: %s", sha)
175 }
176
177 sizeStr := typ[idx+1 : len(typ)-1]
178 typ = typ[:idx]
179
180 size, err = strconv.ParseInt(sizeStr, 10, 64)
181 return sha, typ, size, err
182}
183
184// NOTE: readCommit doesn't return complete go-git [object.Commit] object!
185// The embedded object store is missing, so calling method from returned commit
186// can lead to panic.
187func ReadCommit(oid plumbing.Hash, reader io.Reader) (*object.Commit, error) {
188 commit := &object.Commit{
189 Hash: oid,
190 }
191
192 payloadSB := new(strings.Builder)
193 signatureSB := new(strings.Builder)
194 messageSB := new(strings.Builder)
195 firstLine := true
196 message := false
197 pgpsig := false
198
199 bufReader, ok := reader.(*bufio.Reader)
200 if !ok {
201 bufReader = bufio.NewReader(reader)
202 }
203
204readLoop:
205 for {
206 line, err := bufReader.ReadBytes('\n')
207 if err != nil {
208 if err == io.EOF {
209 if message {
210 _, _ = messageSB.Write(line)
211 }
212 _, _ = payloadSB.Write(line)
213 break readLoop
214 }
215 return nil, err
216 }
217 if pgpsig {
218 if len(line) > 0 && line[0] == ' ' {
219 _, _ = signatureSB.Write(line[1:])
220 continue
221 }
222 pgpsig = false
223 }
224
225 if !message {
226 // This is probably not correct but is copied from go-gits interpretation...
227 trimmed := bytes.TrimSpace(line)
228 if len(trimmed) == 0 {
229 message = true
230 _, _ = payloadSB.Write(line)
231 continue
232 }
233
234 split := bytes.SplitN(trimmed, []byte{' '}, 2)
235 var data []byte
236 if len(split) > 1 {
237 data = split[1]
238 }
239
240 switch string(split[0]) {
241 case "tree":
242 commit.TreeHash = plumbing.NewHash(string(data))
243 _, _ = payloadSB.Write(line)
244 case "parent":
245 commit.ParentHashes = append(commit.ParentHashes, plumbing.NewHash(string(data)))
246 _, _ = payloadSB.Write(line)
247 case "author":
248 commit.Author.Decode(data)
249 _, _ = payloadSB.Write(line)
250 case "committer":
251 commit.Committer.Decode(data)
252 _, _ = payloadSB.Write(line)
253 case "gpgsig":
254 fallthrough
255 case "gpgsig-sha256": // FIXME: no intertop, so only 1 exists at present.
256 _, _ = signatureSB.Write(data)
257 _ = signatureSB.WriteByte('\n')
258 pgpsig = true
259 default:
260 // If the first line is not any of the known headers, then it is probably the prefix added when git cat-file is called with --batch, and that is not part of the payload
261 if !firstLine {
262 // Every subsequent header field is added to the payload
263 _, _ = payloadSB.Write(line)
264 }
265 }
266 } else {
267 _, _ = messageSB.Write(line)
268 _, _ = payloadSB.Write(line)
269 }
270
271 firstLine = false
272 }
273 commit.Message = messageSB.String()
274 commit.PGPSignature = signatureSB.String()
275
276 return commit, nil
277}
278
279// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream
280// This carefully avoids allocations - except where fnameBuf is too small.
281// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
282//
283// Each line is composed of:
284// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
285//
286// We don't attempt to convert the raw HASH to save a lot of time
287func ParseCatFileTreeLine(rd *bufio.Reader) (mode, fname, sha []byte, n int, err error) {
288 modeBuf := make([]byte, 40)
289 fnameBuf := make([]byte, 4096)
290 shaBuf := make([]byte, hash.HexSize)
291
292 var readBytes []byte
293
294 // Read the Mode & fname
295 readBytes, err = rd.ReadSlice('\x00')
296 if err != nil {
297 return mode, fname, sha, n, err
298 }
299 idx := bytes.IndexByte(readBytes, ' ')
300 if idx < 0 {
301 return mode, fname, sha, n, fmt.Errorf("missing")
302 }
303
304 n += idx + 1
305 copy(modeBuf, readBytes[:idx])
306 if len(modeBuf) >= idx {
307 modeBuf = modeBuf[:idx]
308 } else {
309 modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
310 }
311 mode = modeBuf
312
313 readBytes = readBytes[idx+1:]
314
315 // Deal with the fname
316 copy(fnameBuf, readBytes)
317 if len(fnameBuf) > len(readBytes) {
318 fnameBuf = fnameBuf[:len(readBytes)]
319 } else {
320 fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
321 }
322 for err == bufio.ErrBufferFull {
323 readBytes, err = rd.ReadSlice('\x00')
324 fnameBuf = append(fnameBuf, readBytes...)
325 }
326 n += len(fnameBuf)
327 if err != nil {
328 return mode, fname, sha, n, err
329 }
330 fnameBuf = fnameBuf[:len(fnameBuf)-1]
331 fname = fnameBuf
332
333 // Deal with the binary hash
334 idx = 0
335 length := hash.HexSize / 2
336 for idx < length {
337 var read int
338 read, err = rd.Read(shaBuf[idx:length])
339 n += read
340 if err != nil {
341 return mode, fname, sha, n, err
342 }
343 idx += read
344 }
345 sha = shaBuf
346 return mode, fname, sha, n, err
347}
348
349func DiscardFull(rd *bufio.Reader, discard int64) error {
350 if discard > math.MaxInt32 {
351 n, err := rd.Discard(math.MaxInt32)
352 discard -= int64(n)
353 if err != nil {
354 return err
355 }
356 }
357 for discard > 0 {
358 n, err := rd.Discard(int(discard))
359 discard -= int64(n)
360 if err != nil {
361 return err
362 }
363 }
364 return nil
365}