Monorepo for Tangled tangled.org
7

Configure Feed

Select the types of activity you want to include in your feed.

knotserver/git: improve language detection performance

- early exit on vendored dirs, dotfiles, docs, and config files
- avoid using FileContentN, directly use object.GetBlob instead
- avoid calling parent.Size() and directly access blob.Size instead

Signed-off-by: oppiliappan <me@oppi.li>

author
oppiliappan
committer
Tangled
date (May 11, 2026, 2:31 PM +0300) commit 9c46b3b9 parent 344aed13 change-id ovlspqxq
+23 -2
+23 -2
knotserver/git/language.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "io" 5 6 "path" 6 7 "strings" 7 8 ··· 11 12 12 13 type LangBreakdown map[string]int64 13 14 15 + const ( 16 + langContentLimit = 16 * 1024 // read up to 16 KB for language detection 17 + langSizeLimit = 1 * 1024 * 1024 // skip content read for blobs over 1 MB 18 + ) 19 + 14 20 func (g *GitRepo) AnalyzeLanguages(ctx context.Context) (LangBreakdown, error) { 15 21 sizes := make(map[string]int64) 16 22 err := g.Walk(ctx, "", func(node object.TreeEntry, parent *object.Tree, root string) error { 17 23 filepath := path.Join(root, node.Name) 18 24 19 - content, err := g.FileContentN(filepath, 16*1024) // 16KB 25 + if enry.IsVendor(filepath) || enry.IsDocumentation(filepath) || 26 + enry.IsDotFile(filepath) || enry.IsConfiguration(filepath) { 27 + return nil 28 + } 29 + 30 + blob, err := object.GetBlob(g.r.Storer, node.Hash) 20 31 if err != nil { 21 32 return nil 22 33 } 34 + sz := blob.Size 35 + 36 + var content []byte 37 + if sz <= langSizeLimit { 38 + r, err := blob.Reader() 39 + if err != nil { 40 + return nil 41 + } 42 + content, _ = io.ReadAll(io.LimitReader(r, langContentLimit)) 43 + r.Close() 44 + } 23 45 24 46 if enry.IsGenerated(filepath, content) || 25 47 enry.IsBinary(content) || ··· 37 59 return nil 38 60 } 39 61 40 - sz, _ := parent.Size(node.Name) 41 62 sizes[language] += sz 42 63 43 64 return nil