Monorepo for Tangled tangled.org
2

Configure Feed

Select the types of activity you want to include in your feed.

cmd/prefill-zoekt: helper script to bulk-enqueue repos to zoekt

Signed-off-by: Seongmin Lee <git@boltless.me>

author
Seongmin Lee
date (Jun 26, 2026, 2:31 AM +0900) commit a10f89b3 parent 325f3d6a change-id nynyszum
+156
+156
cmd/prefill-zoekt/main.go
··· 1 + // prefill-zoekt bulk-enqueues zoekt index tasks 2 + // 3 + // It reads a REPOS file with one repo DID per line: 4 + // 5 + // did:plc:repository 6 + // 7 + // For each DID it resolves the knot from the DID document, then resolves the 8 + // remote HEAD branch + commit via `git ls-remote --symref`, and POSTs an 9 + // enqueue request to the indexserver's /admin/enqueueIndex endpoint. 10 + package main 11 + 12 + import ( 13 + "bytes" 14 + "context" 15 + "encoding/json" 16 + "flag" 17 + "fmt" 18 + "log" 19 + "net/http" 20 + "os" 21 + "os/exec" 22 + "strings" 23 + "sync" 24 + "sync/atomic" 25 + 26 + "github.com/bluesky-social/indigo/atproto/identity" 27 + "github.com/bluesky-social/indigo/atproto/syntax" 28 + "github.com/sourcegraph/zoekt" 29 + ) 30 + 31 + func main() { 32 + reposPath := flag.String("repos", "REPOS", "path to repos list file (one DID per line)") 33 + server := flag.String("server", "http://localhost:6060", "indexserver base url") 34 + plc := flag.String("plc", "https://plc.directory", "atproto PLC directory url") 35 + concurrency := flag.Int("concurrency", 5, "number of repos to process in parallel") 36 + flag.Parse() 37 + 38 + data, err := os.ReadFile(*reposPath) 39 + if err != nil { 40 + log.Fatalf("reading %s: %v", *reposPath, err) 41 + } 42 + 43 + ctx := context.Background() 44 + dir := identity.BaseDirectory{PLCURL: *plc} 45 + 46 + var ok, fail atomic.Int64 47 + var wg sync.WaitGroup 48 + sem := make(chan struct{}, *concurrency) 49 + 50 + for i, line := range strings.Split(string(data), "\n") { 51 + did := strings.TrimSpace(line) 52 + if did == "" { 53 + continue 54 + } 55 + 56 + wg.Add(1) 57 + sem <- struct{}{} 58 + go func(i int, did string) { 59 + defer wg.Done() 60 + defer func() { <-sem }() 61 + 62 + knot, err := resolveKnot(ctx, &dir, did) 63 + if err != nil { 64 + log.Printf("line %d: %s: resolving knot: %v", i+1, did, err) 65 + fail.Add(1) 66 + return 67 + } 68 + 69 + branch, sha, err := resolveHead(knot, did) 70 + if err != nil { 71 + log.Printf("line %d: %s: resolving HEAD: %v", i+1, did, err) 72 + fail.Add(1) 73 + return 74 + } 75 + 76 + if err := enqueue(*server, did, branch, sha); err != nil { 77 + log.Printf("line %d: %s: enqueue: %v", i+1, did, err) 78 + fail.Add(1) 79 + return 80 + } 81 + log.Printf("line %d: %s: enqueued %s@%s (knot=%s)", i+1, did, branch, sha, knot) 82 + ok.Add(1) 83 + }(i, did) 84 + } 85 + 86 + wg.Wait() 87 + fmt.Printf("done: %d enqueued, %d failed\n", ok.Load(), fail.Load()) 88 + } 89 + 90 + func resolveKnot(ctx context.Context, dir identity.Directory, did string) (string, error) { 91 + d, err := syntax.ParseDID(did) 92 + if err != nil { 93 + return "", err 94 + } 95 + ident, err := dir.LookupDID(ctx, d) 96 + if err != nil { 97 + return "", err 98 + } 99 + knot := ident.PDSEndpoint() 100 + if knot == "" { 101 + return "", fmt.Errorf("no PDS endpoint in DID document") 102 + } 103 + return knot, nil 104 + } 105 + 106 + func resolveHead(knot, did string) (branch, sha string, err error) { 107 + url := strings.TrimRight(knot, "/") + "/" + did 108 + out, err := exec.Command( 109 + "git", 110 + "-c", "http.sslVerify=false", 111 + "ls-remote", "--symref", url, "HEAD", 112 + ).Output() 113 + if err != nil { 114 + return "", "", fmt.Errorf("git ls-remote --symref %s HEAD: %w", url, err) 115 + } 116 + for line := range strings.SplitSeq(string(out), "\n") { 117 + fields := strings.Fields(line) 118 + if len(fields) < 2 { 119 + continue 120 + } 121 + switch { 122 + case fields[0] == "ref:": 123 + branch = strings.TrimPrefix(fields[1], "refs/heads/") 124 + case fields[1] == "HEAD": 125 + sha = fields[0] 126 + } 127 + } 128 + if branch == "" || sha == "" { 129 + return "", "", fmt.Errorf("could not resolve HEAD (branch=%q sha=%q)", branch, sha) 130 + } 131 + return branch, sha, nil 132 + } 133 + 134 + func enqueue(server, did, branch, sha string) error { 135 + body, err := json.Marshal(map[string]any{ 136 + "repo": did, 137 + "branches": []zoekt.RepositoryBranch{{Name: branch, Version: sha}}, 138 + }) 139 + if err != nil { 140 + return err 141 + } 142 + 143 + resp, err := http.Post(strings.TrimRight(server, "/")+"/admin/enqueueIndex", 144 + "application/json", bytes.NewReader(body)) 145 + if err != nil { 146 + return err 147 + } 148 + defer resp.Body.Close() 149 + 150 + log.Println("status", resp.StatusCode) 151 + 152 + if resp.StatusCode < 200 || resp.StatusCode >= 300 { 153 + return fmt.Errorf("status %d", resp.StatusCode) 154 + } 155 + return nil 156 + }