Monorepo for Tangled tangled.org
6

Configure Feed

Select the types of activity you want to include in your feed.

knotmirror: prefer `cat-file --buffer` for frequent endpoints

ideally we should use this everywhere and completely remove the go-git
dependency. go-git consumes a lot of memory for large repos because it
loads pack index into heap memory. This is preferred way of other go
based git forges like go-git.

lots of code are copied from go-git implementation and slightly modified
to match tangled's data model.

Signed-off-by: Seongmin Lee <git@boltless.me>

author
Seongmin Lee
date (May 16, 2026, 7:35 PM +0900) commit 1f5b7ce1 parent ed3b3c79 change-id oywuoyko
+1070 -127
+3 -1
appview/repo/index.go
··· 116 116 var languageInfo []types.RepoLanguageDetails 117 117 if !result.IsEmpty { 118 118 // TODO: a bit dirty 119 - languageInfo, err = rp.getLanguageInfo(r.Context(), l, f, result.Ref, ref == "") 119 + langCtx, cancel := context.WithTimeout(r.Context(), 1*time.Second) 120 + defer cancel() 121 + languageInfo, err = rp.getLanguageInfo(langCtx, l, f, result.Ref, ref == "") 120 122 if err != nil { 121 123 l.Warn("failed to compute language percentages", "err", err) 122 124 // non-fatal
+2
go.mod
··· 24 24 github.com/cyphar/filepath-securejoin v0.4.1 25 25 github.com/dgraph-io/ristretto v0.2.0 26 26 github.com/did-method-plc/go-didplc v0.2.2 27 + github.com/djherbis/buffer v1.2.0 28 + github.com/djherbis/nio/v3 v3.0.1 27 29 github.com/docker/docker v28.2.2+incompatible 28 30 github.com/dustin/go-humanize v1.0.1 29 31 github.com/gliderlabs/ssh v0.3.8
+5
go.sum
··· 193 193 github.com/did-method-plc/go-didplc v0.2.2/go.mod h1:bKdJ21irnwNHgVLWWL32zUWqZueXYbJRUcxplZghByo= 194 194 github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= 195 195 github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= 196 + github.com/djherbis/buffer v1.1.0/go.mod h1:VwN8VdFkMY0DCALdY8o00d3IZ6Amz/UNVMWcSaJT44o= 197 + github.com/djherbis/buffer v1.2.0 h1:PH5Dd2ss0C7CRRhQCZ2u7MssF+No9ide8Ye71nPHcrQ= 198 + github.com/djherbis/buffer v1.2.0/go.mod h1:fjnebbZjCUpPinBRD+TDwXSOeNQ7fPQWLfGQqiAiUyE= 199 + github.com/djherbis/nio/v3 v3.0.1 h1:6wxhnuppteMa6RHA4L81Dq7ThkZH8SwnDzXDYy95vB4= 200 + github.com/djherbis/nio/v3 v3.0.1/go.mod h1:Ng4h80pbZFMla1yKzm61cF0tqqilXZYrogmWgZxOcmg= 196 201 github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= 197 202 github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= 198 203 github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+44 -21
knotmirror/xrpc/git_get_blob.go
··· 7 7 "io" 8 8 "net/http" 9 9 "path/filepath" 10 - "runtime/pprof" 11 10 "slices" 12 11 "strings" 13 12 14 13 "github.com/bluesky-social/indigo/atproto/atclient" 15 14 "github.com/bluesky-social/indigo/atproto/syntax" 16 15 "github.com/go-git/go-git/v5/plumbing/object" 17 - "tangled.org/core/knotserver/git" 16 + "tangled.org/core/knotmirror/xrpc/gitea" 18 17 ) 19 18 20 19 func (x *Xrpc) GetBlob(w http.ResponseWriter, r *http.Request) { ··· 30 29 return 31 30 } 32 31 33 - l := x.logger.With("repo", repo, "ref", ref, "path", path) 32 + l := x.logger.With("method", "git.getBlob", "repo", repo, "ref", ref, "path", path) 34 33 l.Debug("request") 35 34 36 35 if path == "" { ··· 38 37 return 39 38 } 40 39 41 - var file *object.File 42 - pprof.Do(r.Context(), pprof.Labels("repo", repo.String()), func(ctx context.Context) { 43 - file, err = x.getFile(ctx, repo, ref, path) 44 - }) 45 - if err != nil || file.Size > 1000*1000 { 40 + size, reader, err := x.getFile(r.Context(), repo, ref, path) 41 + if err != nil { 46 42 l.Warn("local mirror failed, trying proxy", "err", err) 47 43 if x.proxyToKnot(w, r, repo) { 48 44 return ··· 50 46 writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to get blob"}) 51 47 return 52 48 } 53 - 54 - reader, err := file.Reader() 55 - if err != nil { 56 - l.Error("failed to read blob", "err", err) 57 - writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to read the blob"}) 58 - return 59 - } 60 49 defer reader.Close() 61 50 62 51 // default to octet-stream for large blobs 63 - if file.Size > 1000*1000 { // 1MB 52 + if size > 1000*1000 { // 1MB 64 53 w.Header().Set("Content-Type", "application/octet-stream") 65 54 if _, err := io.Copy(w, reader); err != nil { 66 55 l.Error("failed to serve the blob", "err", err) ··· 111 100 w.Write(contents) 112 101 } 113 102 114 - func (x *Xrpc) getFile(ctx context.Context, repo syntax.DID, ref, path string) (*object.File, error) { 103 + func (x *Xrpc) getFile(ctx context.Context, repo syntax.DID, ref, path string) (int64, io.ReadCloser, error) { 115 104 repoPath, err := x.makeRepoPath(ctx, repo) 116 105 if err != nil { 117 - return nil, fmt.Errorf("resolving repo did: %w", err) 106 + return 0, nil, fmt.Errorf("resolving repo did: %w", err) 107 + } 108 + 109 + rev := ref 110 + if rev == "" { 111 + rev = "HEAD" 112 + } 113 + 114 + head, err := gitea.GetCommit(ctx, repoPath, rev) 115 + if err != nil { 116 + return 0, nil, fmt.Errorf("get head commit: %w", err) 118 117 } 119 118 120 - gr, err := git.Open(repoPath, ref) 119 + treePath := filepath.Dir(path) 120 + name := filepath.Base(path) 121 + 122 + // find subTree 123 + subRev := head.Hash.String() + "^{tree}" 124 + if treePath != "." { 125 + subRev = head.Hash.String() + ":" + treePath 126 + } 127 + subTree, err := gitea.GetTree(ctx, repoPath, subRev) 121 128 if err != nil { 122 - return nil, fmt.Errorf("opening git repo: %w", err) 129 + return 0, nil, fmt.Errorf("get subtree %s: %w", subRev, err) 123 130 } 124 131 125 - return gr.File(path) 132 + // find entry 133 + entry, err := func(subTree *object.Tree) (*object.TreeEntry, error) { 134 + for _, entry := range subTree.Entries { 135 + if entry.Name == name { 136 + return &entry, nil 137 + } 138 + } 139 + return nil, fmt.Errorf("object doesn't exist") 140 + }(subTree) 141 + if err != nil { 142 + return 0, nil, fmt.Errorf("get file: %w", err) 143 + } 144 + 145 + x.logger.Debug("ReadBlob", "name", entry.Name, "mode", entry.Mode.String(), "hash", entry.Hash.String()) 146 + 147 + // find blob 148 + return gitea.ReadBlob(ctx, repoPath, entry.Hash) 126 149 } 127 150 128 151 var textualMimeTypes = []string{
+143 -79
knotmirror/xrpc/git_get_tree.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "io" 6 7 "net/http" 7 8 "path/filepath" 8 - "runtime/pprof" 9 9 "time" 10 - "unicode/utf8" 11 10 12 11 "github.com/bluesky-social/indigo/atproto/atclient" 13 12 "github.com/bluesky-social/indigo/atproto/syntax" 13 + 14 + "github.com/go-git/go-git/v5/plumbing" 15 + "github.com/go-git/go-git/v5/plumbing/object" 14 16 "tangled.org/core/api/tangled" 15 - "tangled.org/core/appview/pages/markup" 16 - "tangled.org/core/knotserver/git" 17 - "tangled.org/core/types" 17 + "tangled.org/core/knotmirror/xrpc/gitea" 18 + ) 19 + 20 + const ( 21 + LastCommitCache = "last_commit:%s:%s" 22 + LastCommitCacheTTL = 30 * 24 * time.Hour 18 23 ) 19 24 20 25 func (x *Xrpc) GetTree(w http.ResponseWriter, r *http.Request) { ··· 33 38 } 34 39 35 40 var out *tangled.GitTempGetTree_Output 36 - pprof.Do(r.Context(), pprof.Labels("repo", repo.String()), func(ctx context.Context) { 37 - out, err = x.getTree(ctx, repo, ref, path) 38 - }) 41 + out, err = x.getTree(r.Context(), repo, ref, path) 39 42 if err != nil { 40 43 l.Warn("local mirror failed, trying proxy", "repo", repo, "err", err) 41 44 if x.proxyToKnot(w, r, repo) { ··· 47 50 writeJson(w, http.StatusOK, out) 48 51 } 49 52 50 - func (x *Xrpc) getTree(ctx context.Context, repo syntax.DID, ref, path string) (*tangled.GitTempGetTree_Output, error) { 53 + func (x *Xrpc) getTree(ctx context.Context, repo syntax.DID, ref, treePath string) (*tangled.GitTempGetTree_Output, error) { 51 54 repoPath, err := x.makeRepoPath(ctx, repo) 52 55 if err != nil { 53 56 return nil, fmt.Errorf("failed to resolve repo did: %w", err) 54 57 } 58 + rev := ref 59 + if rev == "" { 60 + rev = "HEAD" 61 + } 55 62 56 - gr, err := git.Open(repoPath, ref) 63 + head, err := gitea.GetCommit(ctx, repoPath, rev) 57 64 if err != nil { 58 - return nil, fmt.Errorf("opening git repo: %w", err) 65 + return nil, fmt.Errorf("get head commit: %w", err) 59 66 } 60 67 61 - files, err := gr.FileTree(ctx, path) 68 + subRev := head.Hash.String() + "^{tree}" 69 + if treePath != "" { 70 + subRev = head.Hash.String() + ":" + treePath 71 + } 72 + subTree, err := gitea.GetTree(ctx, repoPath, subRev) 62 73 if err != nil { 63 - return nil, fmt.Errorf("reading file tree: %w", err) 74 + return nil, fmt.Errorf("get subtree %s: %w", subRev, err) 64 75 } 65 76 66 - // if any of these files are a readme candidate, pass along its blob contents too 67 - var readmeFileName string 68 - var readmeContents string 69 - for _, file := range files { 70 - if markup.IsReadmeFile(file.Name) { 71 - contents, err := gr.RawContent(filepath.Join(path, file.Name)) 72 - if err != nil { 73 - x.logger.Error("failed to read contents of file", "path", path, "file", file.Name) 74 - } 77 + entryPaths := make([]string, len(subTree.Entries)+1) 78 + entryPaths[0] = "" 79 + for i, entry := range subTree.Entries { 80 + entryPaths[i+1] = entry.Name 81 + } 82 + 83 + commits, lastCommit, err := func(ctx context.Context, commit *object.Commit, treePath string, paths []string) (map[string]*object.Commit, *object.Commit, error) { 84 + headRef := commit.Hash.String() 85 + 86 + revs := make(map[string]string, len(paths)) 87 + var unHitPaths []string 75 88 76 - if utf8.Valid(contents) { 77 - readmeFileName = file.Name 78 - readmeContents = string(contents) 79 - break 89 + keys := make([]string, len(paths)) 90 + for i, path := range paths { 91 + keys[i] = fmt.Sprintf(LastCommitCache, headRef, filepath.Join(treePath, path)) 92 + } 93 + if cached, err := x.rdb.MGet(ctx, keys...).Result(); err == nil { 94 + for i, v := range cached { 95 + if s, ok := v.(string); ok && s != "" { 96 + revs[paths[i]] = s 97 + } else { 98 + unHitPaths = append(unHitPaths, paths[i]) 99 + } 80 100 } 101 + } else { 102 + unHitPaths = paths 81 103 } 82 - } 83 104 84 - // convert NiceTree -> tangled.RepoTempGetTree_TreeEntry 85 - treeEntries := make([]*tangled.GitTempGetTree_TreeEntry, len(files)) 86 - for i, file := range files { 87 - entry := &tangled.GitTempGetTree_TreeEntry{ 88 - Name: file.Name, 89 - Mode: file.Mode, 90 - Size: file.Size, 91 - } 92 - if file.LastCommit != nil { 93 - entry.Last_commit = &tangled.GitTempGetTree_LastCommit{ 94 - Hash: file.LastCommit.Hash.String(), 95 - Message: file.LastCommit.Message, 96 - When: file.LastCommit.When.Format(time.RFC3339), 105 + if len(unHitPaths) > 0 { 106 + commits, err := gitea.WalkGitLog(ctx, repoPath, headRef, treePath, unHitPaths...) 107 + if err != nil { 108 + return nil, nil, err 109 + } 110 + pipe := x.rdb.Pipeline() 111 + for path, cid := range commits { 112 + if cid == "" { 113 + continue 114 + } 115 + revs[path] = cid 116 + pipe.Set(ctx, fmt.Sprintf(LastCommitCache, headRef, filepath.Join(treePath, path)), cid, LastCommitCacheTTL) 117 + } 118 + if _, err := pipe.Exec(ctx); err != nil { 119 + x.logger.Warn("git last-commit cache write failed", "err", err) 97 120 } 98 121 } 99 - treeEntries[i] = entry 100 - } 101 122 102 - var parentPtr *string 103 - if path != "" { 104 - parentPtr = &path 105 - } 123 + // start cat-file batch 124 + batchWriter, batchReader, cancel := gitea.CatFileBatch(ctx, repoPath) 125 + defer cancel() 106 126 107 - var dotdotPtr *string 108 - if path != "" { 109 - dotdot := filepath.Dir(path) 110 - if dotdot != "." { 111 - dotdotPtr = &dotdot 127 + // path -> commit map 128 + commitsMap := map[string]*object.Commit{} 129 + for path, commitId := range revs { 130 + if commitId == headRef { 131 + commitsMap[path] = commit 132 + continue 133 + } 134 + 135 + if commitId == "" { // invalid commit? 136 + continue 137 + } 138 + 139 + _, err := batchWriter.Write([]byte(commitId + "\n")) 140 + if err != nil { 141 + return nil, nil, err 142 + } 143 + _, typ, size, err := gitea.ReadBatchLine(batchReader) 144 + if err != nil { 145 + return nil, nil, err 146 + } 147 + if typ != "commit" { 148 + if err := gitea.DiscardFull(batchReader, size+1); err != nil { 149 + return nil, nil, err 150 + } 151 + return nil, nil, fmt.Errorf("unexpected type: %s for commit id: %s", typ, commitId) 152 + } 153 + c, err := gitea.ReadCommit(plumbing.NewHash(commitId), io.LimitReader(batchReader, size)) 154 + if _, err := batchReader.Discard(1); err != nil { 155 + return nil, nil, err 156 + } 157 + commitsMap[path] = c 112 158 } 159 + 160 + var treeCommit *object.Commit 161 + if treePath == "" { 162 + treeCommit = commit 163 + } else if c, ok := commitsMap[""]; ok { 164 + treeCommit = c 165 + } 166 + 167 + return commitsMap, treeCommit, nil 168 + }(ctx, head, treePath, entryPaths) 169 + if err != nil { 170 + return nil, err 113 171 } 114 172 115 - // find the most recent commit across all entries for the directory-level last commit 116 - var lastCommitInfo *types.LastCommitInfo 117 - for _, file := range files { 118 - if file.LastCommit == nil { 119 - continue 173 + outEntries := make([]*tangled.GitTempGetTree_TreeEntry, len(subTree.Entries)) 174 + for i, entry := range subTree.Entries { 175 + var entryLastCommit *tangled.GitTempGetTree_LastCommit 176 + if commit, ok := commits[entry.Name]; ok { 177 + entryLastCommit = &tangled.GitTempGetTree_LastCommit{ 178 + Hash: commit.Hash.String(), 179 + Message: commit.Message, 180 + When: commit.Author.When.Format(time.RFC3339), 181 + } 120 182 } 121 - if lastCommitInfo == nil { 122 - lastCommitInfo = file.LastCommit 123 - continue 183 + outEntries[i] = &tangled.GitTempGetTree_TreeEntry{ 184 + Name: entry.Name, 185 + Mode: entry.Mode.String(), 186 + Last_commit: entryLastCommit, 124 187 } 125 - if file.LastCommit.When.After(lastCommitInfo.When) { 126 - lastCommitInfo = file.LastCommit 188 + } 189 + 190 + var parent *string 191 + var dotdot *string 192 + if treePath != "" { 193 + parent = &treePath 194 + if dir := filepath.Dir(treePath); dir != "" { 195 + dotdot = &dir 127 196 } 128 197 } 129 198 130 - var lastCommit *tangled.GitTempGetTree_LastCommit 131 - if lastCommitInfo != nil { 132 - lastCommit = &tangled.GitTempGetTree_LastCommit{ 133 - Hash: lastCommitInfo.Hash.String(), 134 - Message: lastCommitInfo.Message, 135 - When: lastCommitInfo.When.Format(time.RFC3339), 136 - } 137 - if commit, err := gr.Commit(lastCommitInfo.Hash); err == nil { 138 - lastCommit.Author = &tangled.GitTempGetTree_Signature{ 139 - Name: commit.Author.Name, 140 - Email: commit.Author.Email, 141 - } 199 + var outLastCommit *tangled.GitTempGetTree_LastCommit 200 + if lastCommit != nil { 201 + outLastCommit = &tangled.GitTempGetTree_LastCommit{ 202 + Hash: lastCommit.Hash.String(), 203 + Message: lastCommit.Message, 204 + When: lastCommit.Committer.When.Format(time.RFC3339), 142 205 } 143 206 } 144 207 145 208 return &tangled.GitTempGetTree_Output{ 146 209 Ref: ref, 147 - Parent: parentPtr, 148 - Dotdot: dotdotPtr, 149 - Files: treeEntries, 150 - LastCommit: lastCommit, 210 + Parent: parent, 211 + Dotdot: dotdot, 212 + Files: outEntries, 213 + LastCommit: outLastCommit, 214 + // TODO: remove this field entirely 151 215 Readme: &tangled.GitTempGetTree_Readme{ 152 - Filename: readmeFileName, 153 - Contents: readmeContents, 216 + Filename: "", 217 + Contents: "", 154 218 }, 155 219 }, nil 156 220 }
+10 -15
knotmirror/xrpc/git_list_languages.go
··· 6 6 "fmt" 7 7 "math" 8 8 "net/http" 9 - "runtime/pprof" 10 9 "time" 11 10 12 11 "github.com/bluesky-social/indigo/atproto/atclient" ··· 49 48 } 50 49 51 50 var out *tangled.GitTempListLanguages_Output 52 - pprof.Do(r.Context(), pprof.Labels("repo", repo.String()), func(ctx context.Context) { 53 - out, err = x.listLanguages(ctx, repo, ref) 54 - }) 51 + out, err = x.listLanguages(r.Context(), repo, ref) 55 52 if err != nil { 56 53 l.Warn("local mirror failed, trying proxy", "err", err) 57 54 if x.proxyToKnot(w, r, repo) { ··· 61 58 return 62 59 } 63 60 61 + go func() { 62 + ctx := context.Background() 63 + encoded, err := json.Marshal(out.Languages) 64 + if err != nil { 65 + return 66 + } 67 + x.rdb.Set(ctx, fmt.Sprintf(RepoLanguagesByDid, repo, ref), encoded, RepoLanguagesTTL) 68 + }() 69 + 64 70 writeJson(w, http.StatusOK, out) 65 71 } 66 72 ··· 82 88 if err != nil { 83 89 return nil, fmt.Errorf("analyzing languages: %w", err) 84 90 } 85 - 86 - langs := sizesToLanguages(sizes) 87 - 88 - go func() { 89 - ctx := context.Background() 90 - encoded, err := json.Marshal(langs) 91 - if err != nil { 92 - return 93 - } 94 - x.rdb.Set(ctx, fmt.Sprintf(RepoLanguagesByDid, repo, ref), encoded, RepoLanguagesTTL) 95 - }() 96 91 97 92 return &tangled.GitTempListLanguages_Output{ 98 93 Ref: ref,
+361
knotmirror/xrpc/gitea/batch.go
··· 1 + // NOTE: lot's of code compied from Gitea with slight modification to use go-git objects 2 + 3 + package gitea 4 + 5 + import ( 6 + "bufio" 7 + "bytes" 8 + "context" 9 + "fmt" 10 + "io" 11 + "math" 12 + "os/exec" 13 + "strconv" 14 + "strings" 15 + 16 + "github.com/djherbis/buffer" 17 + "github.com/djherbis/nio/v3" 18 + "github.com/go-git/go-git/v5/plumbing" 19 + "github.com/go-git/go-git/v5/plumbing/filemode" 20 + "github.com/go-git/go-git/v5/plumbing/hash" 21 + "github.com/go-git/go-git/v5/plumbing/object" 22 + ) 23 + 24 + func GetCommit(ctx context.Context, repoPath, rev string) (*object.Commit, error) { 25 + wr, rd, cancel := CatFileBatch(ctx, repoPath) 26 + defer cancel() 27 + 28 + if _, err := wr.Write([]byte(rev + "\n")); err != nil { 29 + return nil, fmt.Errorf("write rev: %w", err) 30 + } 31 + sha, typ, size, err := ReadBatchLine(rd) 32 + if err != nil { 33 + return nil, err 34 + } 35 + if typ != "commit" { 36 + if err := DiscardFull(rd, size+1); err != nil { 37 + return nil, err 38 + } 39 + return nil, fmt.Errorf("unexpected type: %s for commit: %s", typ, rev) 40 + } 41 + commit, err := ReadCommit(plumbing.NewHash(string(sha)), io.LimitReader(rd, size)) 42 + if err != nil { 43 + return nil, fmt.Errorf("read commit %s: %w", rev, err) 44 + } 45 + if _, err := rd.Discard(1); err != nil { 46 + return nil, err 47 + } 48 + return commit, nil 49 + } 50 + 51 + func GetTree(ctx context.Context, repoPath, rev string) (*object.Tree, error) { 52 + wr, rd, cancel := CatFileBatch(ctx, repoPath) 53 + defer cancel() 54 + 55 + if _, err := wr.Write([]byte(rev + "\n")); err != nil { 56 + return nil, fmt.Errorf("write rev: %w", err) 57 + } 58 + sha, typ, size, err := ReadBatchLine(rd) 59 + if err != nil { 60 + return nil, fmt.Errorf("resolve %s: %w", rev, err) 61 + } 62 + if typ != "tree" { 63 + if err := DiscardFull(rd, size+1); err != nil { 64 + return nil, err 65 + } 66 + return nil, fmt.Errorf("unexpected type: %s for tree: %s", typ, rev) 67 + } 68 + 69 + entries, err := catBatchParseTreeEntries(rd, size) 70 + if err != nil { 71 + return nil, fmt.Errorf("read tree %s: %w", rev, err) 72 + } 73 + return &object.Tree{ 74 + Hash: plumbing.NewHash(string(sha)), 75 + Entries: entries, 76 + }, nil 77 + } 78 + 79 + func catBatchParseTreeEntries(rd *bufio.Reader, sz int64) ([]object.TreeEntry, error) { 80 + entries := make([]object.TreeEntry, 0, 10) 81 + loop: 82 + for sz > 0 { 83 + mode, fname, sha, count, err := ParseCatFileTreeLine(rd) 84 + if err != nil { 85 + if err == io.EOF { 86 + break loop 87 + } 88 + return nil, err 89 + } 90 + modeNum, err := strconv.ParseUint(string(mode), 8, 32) 91 + if err != nil { 92 + return nil, err 93 + } 94 + sz -= int64(count) 95 + entry := object.TreeEntry{ 96 + Name: string(fname), 97 + Mode: filemode.FileMode(modeNum), 98 + Hash: plumbing.Hash(sha), 99 + } 100 + entries = append(entries, entry) 101 + } 102 + if _, err := rd.Discard(1); err != nil { 103 + return entries, err 104 + } 105 + return entries, nil 106 + } 107 + 108 + func CatFileBatch(ctx context.Context, repoPath string) (io.WriteCloser, *bufio.Reader, func()) { 109 + batchStdinReader, batchStdinWriter := io.Pipe() 110 + batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 111 + ctx, ctxCancel := context.WithCancel(ctx) 112 + closed := make(chan struct{}) 113 + cancel := func() { 114 + ctxCancel() 115 + _ = batchStdinWriter.Close() 116 + _ = batchStdoutReader.Close() 117 + <-closed 118 + } 119 + 120 + // Ensure cancel is called as soon as the provided context is cancelled 121 + go func() { 122 + <-ctx.Done() 123 + cancel() 124 + }() 125 + 126 + go func() { 127 + stderr := &strings.Builder{} 128 + cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "cat-file", "--batch") 129 + cmd.Stdin = batchStdinReader 130 + cmd.Stdout = batchStdoutWriter 131 + cmd.Stderr = stderr 132 + if err := cmd.Run(); err != nil { 133 + _ = batchStdinReader.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String())) 134 + _ = batchStdoutWriter.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String())) 135 + } else { 136 + _ = batchStdoutWriter.Close() 137 + _ = batchStdinReader.Close() 138 + } 139 + close(closed) 140 + }() 141 + 142 + batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) 143 + return batchStdinWriter, batchReader, cancel 144 + } 145 + 146 + func ReadBatchLine(reader io.Reader) (sha []byte, typ string, size int64, err error) { 147 + rd, ok := reader.(*bufio.Reader) 148 + if !ok { 149 + rd = bufio.NewReader(reader) 150 + } 151 + typ, err = rd.ReadString('\n') 152 + if err != nil { 153 + return sha, typ, size, err 154 + } 155 + if len(typ) == 1 { 156 + typ, err = rd.ReadString('\n') 157 + if err != nil { 158 + return sha, typ, size, err 159 + } 160 + } 161 + idx := strings.IndexByte(typ, ' ') 162 + if idx < 0 { 163 + return sha, typ, size, fmt.Errorf("missing sha: %s", sha) 164 + } 165 + sha = []byte(typ[:idx]) 166 + typ = typ[idx+1:] 167 + 168 + idx = strings.IndexByte(typ, ' ') 169 + if idx < 0 { 170 + return sha, typ, size, fmt.Errorf("missing size: %s", sha) 171 + } 172 + 173 + sizeStr := typ[idx+1 : len(typ)-1] 174 + typ = typ[:idx] 175 + 176 + size, err = strconv.ParseInt(sizeStr, 10, 64) 177 + return sha, typ, size, err 178 + } 179 + 180 + // NOTE: readCommit doesn't return complete go-git [object.Commit] object! 181 + // The embedded object store is missing, so calling method from returned commit 182 + // can lead to panic. 183 + func ReadCommit(oid plumbing.Hash, reader io.Reader) (*object.Commit, error) { 184 + commit := &object.Commit{ 185 + Hash: oid, 186 + } 187 + 188 + payloadSB := new(strings.Builder) 189 + signatureSB := new(strings.Builder) 190 + messageSB := new(strings.Builder) 191 + firstLine := true 192 + message := false 193 + pgpsig := false 194 + 195 + bufReader, ok := reader.(*bufio.Reader) 196 + if !ok { 197 + bufReader = bufio.NewReader(reader) 198 + } 199 + 200 + readLoop: 201 + for { 202 + line, err := bufReader.ReadBytes('\n') 203 + if err != nil { 204 + if err == io.EOF { 205 + if message { 206 + _, _ = messageSB.Write(line) 207 + } 208 + _, _ = payloadSB.Write(line) 209 + break readLoop 210 + } 211 + return nil, err 212 + } 213 + if pgpsig { 214 + if len(line) > 0 && line[0] == ' ' { 215 + _, _ = signatureSB.Write(line[1:]) 216 + continue 217 + } 218 + pgpsig = false 219 + } 220 + 221 + if !message { 222 + // This is probably not correct but is copied from go-gits interpretation... 223 + trimmed := bytes.TrimSpace(line) 224 + if len(trimmed) == 0 { 225 + message = true 226 + _, _ = payloadSB.Write(line) 227 + continue 228 + } 229 + 230 + split := bytes.SplitN(trimmed, []byte{' '}, 2) 231 + var data []byte 232 + if len(split) > 1 { 233 + data = split[1] 234 + } 235 + 236 + switch string(split[0]) { 237 + case "tree": 238 + commit.TreeHash = plumbing.NewHash(string(data)) 239 + _, _ = payloadSB.Write(line) 240 + case "parent": 241 + commit.ParentHashes = append(commit.ParentHashes, plumbing.NewHash(string(data))) 242 + _, _ = payloadSB.Write(line) 243 + case "author": 244 + commit.Author.Decode(data) 245 + _, _ = payloadSB.Write(line) 246 + case "committer": 247 + commit.Committer.Decode(data) 248 + _, _ = payloadSB.Write(line) 249 + case "gpgsig": 250 + fallthrough 251 + case "gpgsig-sha256": // FIXME: no intertop, so only 1 exists at present. 252 + _, _ = signatureSB.Write(data) 253 + _ = signatureSB.WriteByte('\n') 254 + pgpsig = true 255 + default: 256 + // If the first line is not any of the known headers, then it is probably the prefix added when git cat-file is called with --batch, and that is not part of the payload 257 + if !firstLine { 258 + // Every subsequent header field is added to the payload 259 + _, _ = payloadSB.Write(line) 260 + } 261 + } 262 + } else { 263 + _, _ = messageSB.Write(line) 264 + _, _ = payloadSB.Write(line) 265 + } 266 + 267 + firstLine = false 268 + } 269 + commit.Message = messageSB.String() 270 + commit.PGPSignature = signatureSB.String() 271 + 272 + return commit, nil 273 + } 274 + 275 + // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream 276 + // This carefully avoids allocations - except where fnameBuf is too small. 277 + // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations 278 + // 279 + // Each line is composed of: 280 + // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH> 281 + // 282 + // We don't attempt to convert the raw HASH to save a lot of time 283 + func ParseCatFileTreeLine(rd *bufio.Reader) (mode, fname, sha []byte, n int, err error) { 284 + modeBuf := make([]byte, 40) 285 + fnameBuf := make([]byte, 4096) 286 + shaBuf := make([]byte, hash.HexSize) 287 + 288 + var readBytes []byte 289 + 290 + // Read the Mode & fname 291 + readBytes, err = rd.ReadSlice('\x00') 292 + if err != nil { 293 + return mode, fname, sha, n, err 294 + } 295 + idx := bytes.IndexByte(readBytes, ' ') 296 + if idx < 0 { 297 + return mode, fname, sha, n, fmt.Errorf("missing") 298 + } 299 + 300 + n += idx + 1 301 + copy(modeBuf, readBytes[:idx]) 302 + if len(modeBuf) >= idx { 303 + modeBuf = modeBuf[:idx] 304 + } else { 305 + modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...) 306 + } 307 + mode = modeBuf 308 + 309 + readBytes = readBytes[idx+1:] 310 + 311 + // Deal with the fname 312 + copy(fnameBuf, readBytes) 313 + if len(fnameBuf) > len(readBytes) { 314 + fnameBuf = fnameBuf[:len(readBytes)] 315 + } else { 316 + fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) 317 + } 318 + for err == bufio.ErrBufferFull { 319 + readBytes, err = rd.ReadSlice('\x00') 320 + fnameBuf = append(fnameBuf, readBytes...) 321 + } 322 + n += len(fnameBuf) 323 + if err != nil { 324 + return mode, fname, sha, n, err 325 + } 326 + fnameBuf = fnameBuf[:len(fnameBuf)-1] 327 + fname = fnameBuf 328 + 329 + // Deal with the binary hash 330 + idx = 0 331 + length := hash.HexSize / 2 332 + for idx < length { 333 + var read int 334 + read, err = rd.Read(shaBuf[idx:length]) 335 + n += read 336 + if err != nil { 337 + return mode, fname, sha, n, err 338 + } 339 + idx += read 340 + } 341 + sha = shaBuf 342 + return mode, fname, sha, n, err 343 + } 344 + 345 + func DiscardFull(rd *bufio.Reader, discard int64) error { 346 + if discard > math.MaxInt32 { 347 + n, err := rd.Discard(math.MaxInt32) 348 + discard -= int64(n) 349 + if err != nil { 350 + return err 351 + } 352 + } 353 + for discard > 0 { 354 + n, err := rd.Discard(int(discard)) 355 + discard -= int64(n) 356 + if err != nil { 357 + return err 358 + } 359 + } 360 + return nil 361 + }
+80
knotmirror/xrpc/gitea/blob.go
··· 1 + // Copyright 2021 The Gitea Authors. All rights reserved. 2 + // SPDX-License-Identifier: MIT 3 + 4 + package gitea 5 + 6 + import ( 7 + "bufio" 8 + "bytes" 9 + "context" 10 + "io" 11 + 12 + "github.com/go-git/go-git/v5/plumbing" 13 + ) 14 + 15 + // ReadBlob returns blob size and [io.ReadCloser] of that blob. 16 + func ReadBlob(ctx context.Context, repoPath string, hash plumbing.Hash) (int64, io.ReadCloser, error) { 17 + wr, rd, cancel := CatFileBatch(ctx, repoPath) 18 + 19 + _, err := wr.Write([]byte(hash.String() + "\n")) 20 + if err != nil { 21 + cancel() 22 + return 0, nil, err 23 + } 24 + _, _, size, err := ReadBatchLine(rd) 25 + if err != nil { 26 + cancel() 27 + return 0, nil, err 28 + } 29 + 30 + if size < 4096 { 31 + bs, err := io.ReadAll(io.LimitReader(rd, size)) 32 + defer cancel() 33 + if err != nil { 34 + return 0, nil, err 35 + } 36 + _, err = rd.Discard(1) 37 + return size, io.NopCloser(bytes.NewReader(bs)), err 38 + } 39 + 40 + return size, &blobReader{ 41 + rd: rd, 42 + n: size, 43 + cancel: cancel, 44 + }, nil 45 + } 46 + 47 + type blobReader struct { 48 + rd *bufio.Reader 49 + n int64 50 + cancel func() 51 + } 52 + 53 + func (b *blobReader) Read(p []byte) (n int, err error) { 54 + if b.n <= 0 { 55 + return 0, io.EOF 56 + } 57 + if int64(len(p)) > b.n { 58 + p = p[0:b.n] 59 + } 60 + n, err = b.rd.Read(p) 61 + b.n -= int64(n) 62 + return n, err 63 + } 64 + 65 + // Close implements io.Closer 66 + func (b *blobReader) Close() error { 67 + if b.rd == nil { 68 + return nil 69 + } 70 + 71 + defer b.cancel() 72 + 73 + if err := DiscardFull(b.rd, b.n+1); err != nil { 74 + return err 75 + } 76 + 77 + b.rd = nil 78 + 79 + return nil 80 + }
+410
knotmirror/xrpc/gitea/gitea.go
··· 1 + // Copyright 2021 The Gitea Authors. All rights reserved. 2 + // SPDX-License-Identifier: MIT 3 + 4 + package gitea 5 + 6 + import ( 7 + "bufio" 8 + "bytes" 9 + "context" 10 + "errors" 11 + "fmt" 12 + "io" 13 + "os/exec" 14 + "path" 15 + "strings" 16 + 17 + "github.com/djherbis/buffer" 18 + "github.com/djherbis/nio/v3" 19 + "tangled.org/core/sets" 20 + ) 21 + 22 + // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function 23 + func LogNameStatusRepo(ctx context.Context, repository, headRef, treepath string, paths ...string) (*bufio.Reader, func()) { 24 + // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. 25 + // so let's create a batch stdin and stdout 26 + stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 27 + 28 + // Lets also create a context so that we can absolutely ensure that the command should die when we're done 29 + ctx, ctxCancel := context.WithCancel(ctx) 30 + 31 + cancel := func() { 32 + ctxCancel() 33 + _ = stdoutReader.Close() 34 + _ = stdoutWriter.Close() 35 + } 36 + 37 + cmd := exec.CommandContext(ctx, 38 + "git", 39 + "log", 40 + "--name-status", 41 + "-c", 42 + "--format=commit%x00%H %P%x00", 43 + "--parents", 44 + "--no-renames", 45 + "-t", 46 + "-z", 47 + headRef, 48 + ) 49 + 50 + var files []string 51 + if len(paths) < 70 { 52 + if treepath != "" { 53 + files = append(files, treepath) 54 + for _, pth := range paths { 55 + if pth != "" { 56 + files = append(files, path.Join(treepath, pth)) 57 + } 58 + } 59 + } else { 60 + for _, pth := range paths { 61 + if pth != "" { 62 + files = append(files, pth) 63 + } 64 + } 65 + } 66 + } else if treepath != "" { 67 + files = append(files, treepath) 68 + } 69 + // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg" 70 + for i, file := range files { 71 + files[i] = ":(literal)" + file 72 + } 73 + cmd.Args = append(cmd.Args, files...) 74 + 75 + go func() { 76 + stderr := &strings.Builder{} 77 + cmd.Dir = repository 78 + cmd.Stdout = stdoutWriter 79 + cmd.Stderr = stderr 80 + if err := cmd.Run(); err != nil { 81 + _ = stdoutWriter.CloseWithError(fmt.Errorf("%w\n%s", err, stderr.String())) 82 + return 83 + } 84 + 85 + _ = stdoutWriter.Close() 86 + }() 87 + 88 + // For simplicities sake we'll us a buffered reader to read from the cat-file --batch 89 + bufReader := bufio.NewReaderSize(stdoutReader, 32*1024) 90 + 91 + return bufReader, cancel 92 + } 93 + 94 + // LogNameStatusRepoParser parses a git log raw output from LogRawRepo 95 + type LogNameStatusRepoParser struct { 96 + treepath string 97 + paths []string 98 + next []byte 99 + buffull bool 100 + rd *bufio.Reader 101 + cancel func() 102 + } 103 + 104 + // NewLogNameStatusRepoParser returns a new parser for a git log raw output 105 + func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser { 106 + rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...) 107 + return &LogNameStatusRepoParser{ 108 + treepath: treepath, 109 + paths: paths, 110 + rd: rd, 111 + cancel: cancel, 112 + } 113 + } 114 + 115 + // LogNameStatusCommitData represents a commit artefact from git log raw 116 + type LogNameStatusCommitData struct { 117 + CommitID string 118 + ParentIDs []string 119 + Paths []bool 120 + } 121 + 122 + // Next returns the next LogStatusCommitData 123 + func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) { 124 + var err error 125 + if len(g.next) == 0 { 126 + g.buffull = false 127 + g.next, err = g.rd.ReadSlice('\x00') 128 + if err != nil { 129 + switch err { 130 + case bufio.ErrBufferFull: 131 + g.buffull = true 132 + case io.EOF: 133 + return nil, nil 134 + default: 135 + return nil, err 136 + } 137 + } 138 + } 139 + 140 + ret := LogNameStatusCommitData{} 141 + if bytes.Equal(g.next, []byte("commit\000")) { 142 + g.next, err = g.rd.ReadSlice('\x00') 143 + if err != nil { 144 + switch err { 145 + case bufio.ErrBufferFull: 146 + g.buffull = true 147 + case io.EOF: 148 + return nil, nil 149 + default: 150 + return nil, err 151 + } 152 + } 153 + } 154 + 155 + // Our "line" must look like: <commitid> SP (<parent> SP) * NUL 156 + commitIDs := string(g.next) 157 + if g.buffull { 158 + more, err := g.rd.ReadString('\x00') 159 + if err != nil { 160 + return nil, err 161 + } 162 + commitIDs += more 163 + } 164 + commitIDs = commitIDs[:len(commitIDs)-1] 165 + splitIDs := strings.Split(commitIDs, " ") 166 + ret.CommitID = splitIDs[0] 167 + if len(splitIDs) > 1 { 168 + ret.ParentIDs = splitIDs[1:] 169 + } 170 + 171 + // now read the next "line" 172 + g.buffull = false 173 + g.next, err = g.rd.ReadSlice('\x00') 174 + if err != nil { 175 + if err == bufio.ErrBufferFull { 176 + g.buffull = true 177 + } else if err != io.EOF { 178 + return nil, err 179 + } 180 + } 181 + 182 + if err == io.EOF || (g.next[0] != '\n' && g.next[0] != '\000') { 183 + return &ret, nil 184 + } 185 + 186 + // Ok we have some changes. 187 + // This line will look like: NL <fname> NUL 188 + // 189 + // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too. 190 + if g.next[0] == '\n' { 191 + g.next = g.next[1:] 192 + } else { 193 + g.buffull = false 194 + g.next, err = g.rd.ReadSlice('\x00') 195 + if err != nil { 196 + if err == bufio.ErrBufferFull { 197 + g.buffull = true 198 + } else if err != io.EOF { 199 + return nil, err 200 + } 201 + } 202 + if len(g.next) == 0 { 203 + return &ret, nil 204 + } 205 + if g.next[0] == '\x00' { 206 + g.buffull = false 207 + g.next, err = g.rd.ReadSlice('\x00') 208 + if err != nil { 209 + if err == bufio.ErrBufferFull { 210 + g.buffull = true 211 + } else if err != io.EOF { 212 + return nil, err 213 + } 214 + } 215 + } 216 + } 217 + 218 + fnameBuf := make([]byte, 4096) 219 + 220 + diffloop: 221 + for { 222 + if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) { 223 + return &ret, nil 224 + } 225 + g.next, err = g.rd.ReadSlice('\x00') 226 + if err != nil { 227 + switch err { 228 + case bufio.ErrBufferFull: 229 + g.buffull = true 230 + case io.EOF: 231 + return &ret, nil 232 + default: 233 + return nil, err 234 + } 235 + } 236 + copy(fnameBuf, g.next) 237 + if len(fnameBuf) < len(g.next) { 238 + fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...) 239 + } else { 240 + fnameBuf = fnameBuf[:len(g.next)] 241 + } 242 + if err != nil { 243 + if err != bufio.ErrBufferFull { 244 + return nil, err 245 + } 246 + more, err := g.rd.ReadBytes('\x00') 247 + if err != nil { 248 + return nil, err 249 + } 250 + fnameBuf = append(fnameBuf, more...) 251 + } 252 + 253 + // read the next line 254 + g.buffull = false 255 + g.next, err = g.rd.ReadSlice('\x00') 256 + if err != nil { 257 + if err == bufio.ErrBufferFull { 258 + g.buffull = true 259 + } else if err != io.EOF { 260 + return nil, err 261 + } 262 + } 263 + 264 + if treepath != "" { 265 + if !bytes.HasPrefix(fnameBuf, []byte(treepath)) { 266 + fnameBuf = fnameBuf[:cap(fnameBuf)] 267 + continue diffloop 268 + } 269 + } 270 + fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1] 271 + if len(fnameBuf) > maxpathlen { 272 + fnameBuf = fnameBuf[:cap(fnameBuf)] 273 + continue diffloop 274 + } 275 + if len(fnameBuf) > 0 { 276 + if len(treepath) > 0 { 277 + if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 { 278 + fnameBuf = fnameBuf[:cap(fnameBuf)] 279 + continue diffloop 280 + } 281 + fnameBuf = fnameBuf[1:] 282 + } else if bytes.IndexByte(fnameBuf, '/') >= 0 { 283 + fnameBuf = fnameBuf[:cap(fnameBuf)] 284 + continue diffloop 285 + } 286 + } 287 + 288 + idx, ok := paths2ids[string(fnameBuf)] 289 + if !ok { 290 + fnameBuf = fnameBuf[:cap(fnameBuf)] 291 + continue diffloop 292 + } 293 + if ret.Paths == nil { 294 + ret.Paths = changed 295 + } 296 + changed[idx] = true 297 + } 298 + } 299 + 300 + // Close closes the parser 301 + func (g *LogNameStatusRepoParser) Close() { 302 + g.cancel() 303 + } 304 + 305 + // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files 306 + func WalkGitLog(ctx context.Context, repoPath string, head, treepath string, paths ...string) (map[string]string, error) { 307 + path2idx := map[string]int{} 308 + maxpathlen := len(treepath) 309 + 310 + for i := range paths { 311 + path2idx[paths[i]] = i 312 + pthlen := len(paths[i]) + len(treepath) + 1 313 + if pthlen > maxpathlen { 314 + maxpathlen = pthlen 315 + } 316 + } 317 + 318 + g := NewLogNameStatusRepoParser(ctx, repoPath, head, treepath, paths...) 319 + // don't use defer g.Close() here as g may change its value - instead wrap in a func 320 + defer func() { 321 + g.Close() 322 + }() 323 + 324 + results := make([]string, len(paths)) 325 + remaining := len(paths) 326 + nextRestart := min((len(paths)*3)/4, 70) 327 + lastEmptyParent := head 328 + commitSinceLastEmptyParent := uint64(0) 329 + commitSinceNextRestart := uint64(0) 330 + parentRemaining := sets.New[string]() 331 + 332 + changed := make([]bool, len(paths)) 333 + 334 + heaploop: 335 + for { 336 + select { 337 + case <-ctx.Done(): 338 + if ctx.Err() == context.DeadlineExceeded { 339 + break heaploop 340 + } 341 + g.Close() 342 + return nil, ctx.Err() 343 + default: 344 + } 345 + current, err := g.Next(treepath, path2idx, changed, maxpathlen) 346 + if err != nil { 347 + if errors.Is(err, context.DeadlineExceeded) { 348 + break heaploop 349 + } 350 + g.Close() 351 + return nil, err 352 + } 353 + if current == nil { 354 + break heaploop 355 + } 356 + parentRemaining.Remove(current.CommitID) 357 + for i, found := range current.Paths { 358 + if !found { 359 + continue 360 + } 361 + changed[i] = false 362 + if results[i] == "" { 363 + results[i] = current.CommitID 364 + delete(path2idx, paths[i]) 365 + remaining-- 366 + if results[0] == "" { 367 + results[0] = current.CommitID 368 + delete(path2idx, "") 369 + remaining-- 370 + } 371 + } 372 + } 373 + 374 + if remaining <= 0 { 375 + break heaploop 376 + } 377 + commitSinceLastEmptyParent++ 378 + if parentRemaining.Len() == 0 { 379 + lastEmptyParent = current.CommitID 380 + commitSinceLastEmptyParent = 0 381 + } 382 + if remaining <= nextRestart { 383 + commitSinceNextRestart++ 384 + if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent { 385 + g.Close() 386 + remainingPaths := make([]string, 0, len(paths)) 387 + for i, pth := range paths { 388 + if results[i] == "" { 389 + remainingPaths = append(remainingPaths, pth) 390 + } 391 + } 392 + g = NewLogNameStatusRepoParser(ctx, repoPath, lastEmptyParent, treepath, remainingPaths...) 393 + parentRemaining = sets.New[string]() 394 + nextRestart = (remaining * 3) / 4 395 + continue heaploop 396 + } 397 + } 398 + for _, id := range current.ParentIDs { 399 + parentRemaining.Insert(id) 400 + } 401 + } 402 + g.Close() 403 + 404 + resultsMap := map[string]string{} 405 + for i, pth := range paths { 406 + resultsMap[pth] = results[i] 407 + } 408 + 409 + return resultsMap, nil 410 + }
+6 -11
knotmirror/xrpc/repo_blob.go
··· 30 30 return 31 31 } 32 32 33 - l := x.logger.With("repo", repo, "ref", ref, "path", path) 33 + l := x.logger.With("method", "repo.blob", "repo", repo, "ref", ref, "path", path) 34 34 35 35 if path == "" { 36 36 writeJson(w, http.StatusBadRequest, atclient.ErrorBody{Name: "BadRequest", Message: "missing path parameter"}) ··· 64 64 return 65 65 } 66 66 67 - file, err := x.getFile(r.Context(), repo, ref, path) 67 + size, reader, err := x.getFile(r.Context(), repo, ref, path) 68 68 if err != nil { 69 69 l.Warn("local mirror failed, trying proxy", "err", err) 70 70 if x.proxyToKnot(w, r, repo) { ··· 73 73 writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to get blob"}) 74 74 return 75 75 } 76 + defer reader.Close() 76 77 77 - if file.Size > 1000*1000 { // 1MB 78 + if size > 1000*1000 { // 1MB 78 79 fileTooLarge := true 79 80 writeJson(w, http.StatusOK, tangled.RepoBlob_Output{ 80 81 Ref: ref, 81 82 Path: path, 82 - Size: &file.Size, 83 + Size: &size, 83 84 FileTooLarge: &fileTooLarge, 84 85 }) 85 86 return 86 87 } 87 88 88 - reader, err := file.Reader() 89 - if err != nil { 90 - l.Error("failed to read blob", "err", err) 91 - writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to read the blob"}) 92 - return 93 - } 94 89 contents, err := io.ReadAll(reader) 95 90 if err != nil { 96 91 l.Error("failed to read blob content", "err", err) ··· 126 121 response := tangled.RepoBlob_Output{ 127 122 Ref: ref, 128 123 Path: path, 129 - Size: &file.Size, 124 + Size: &size, 130 125 IsBinary: &isBinary, 131 126 Content: content, 132 127 }
+6
nix/gomod2nix.toml
··· 244 244 [mod."github.com/distribution/reference"] 245 245 version = "v0.6.0" 246 246 hash = "sha256-gr4tL+qz4jKyAtl8LINcxMSanztdt+pybj1T+2ulQv4=" 247 + [mod."github.com/djherbis/buffer"] 248 + version = "v1.2.0" 249 + hash = "sha256-uHJQWcXwg4j2nOBK6epCm0gkv7KwriHZ1D7dgYqipp4=" 250 + [mod."github.com/djherbis/nio/v3"] 251 + version = "v3.0.1" 252 + hash = "sha256-KZmVhlUht9phMpRtjiEMLjlyBwHKj5tnNQ2SC6pI7Rs=" 247 253 [mod."github.com/dlclark/regexp2"] 248 254 version = "v1.11.5" 249 255 hash = "sha256-jN5+2ED+YbIoPIuyJ4Ou5pqJb2w1uNKzp5yTjKY6rEQ="