Monorepo for Tangled
tangled.org
1package codesearch
2
3import (
4 "bytes"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "net/http"
10 "net/url"
11 "strings"
12 "time"
13
14 "github.com/bluesky-social/indigo/atproto/syntax"
15 "github.com/sourcegraph/zoekt"
16 "github.com/sourcegraph/zoekt/query"
17 "tangled.org/core/appview/models"
18 "tangled.org/core/appview/pagination"
19)
20
21type CodeSearch struct {
22 Host string // zoekt-webserver host. example: https://zoekt.example.com
23 Client *http.Client
24}
25
26func (s *CodeSearch) GetClient() *http.Client {
27 if s.Client != nil {
28 return s.Client
29 }
30 return http.DefaultClient
31}
32
33type RepoOnlyError struct{ Query string }
34
35func (e *RepoOnlyError) Error() string {
36 return "query only filters by repo name; use repo search instead"
37}
38
39// jsonSearchArgs mirrors zoekt's /api/search request body.
40type jsonSearchArgs struct {
41 Q string
42 Opts *zoekt.SearchOptions
43}
44
45// jsonSearchReply mirrors zoekt's /api/search response body.
46type jsonSearchReply struct {
47 Result *zoekt.SearchResult
48}
49
50// jsonListArgs mirrors zoekt's /api/list request body.
51type jsonListArgs struct {
52 Q string
53 Opts *zoekt.ListOptions
54}
55
56// jsonListReply mirrors zoekt's /api/list response body.
57type jsonListReply struct {
58 List *zoekt.RepoList
59}
60
61// SearchResults is a single page of content-search results plus whether more
62// pages follow.
63type SearchResults struct {
64 Results []models.Result
65 HasMore bool
66 Stats zoekt.Stats // zoekt search stats (MatchCount, FileCount, Duration, …)
67}
68
69// Search queries zoekt server for FileNameMatch or ChunkMatch.
70// It returns *RepoOnlyError when the query only filters by repo name
71// (optionally with `lang:` filter.)
72func (s *CodeSearch) Search(ctx context.Context, queryStr string, page pagination.Page) (*SearchResults, error) {
73 q, err := query.Parse(queryStr)
74 if err != nil {
75 return nil, fmt.Errorf("parse query: %w", err)
76 }
77 if rs, ok := asRepoSearch(q); ok {
78 return nil, &RepoOnlyError{Query: rs.Query()}
79 }
80
81 opts := &zoekt.SearchOptions{
82 ChunkMatches: true,
83 MaxWallTime: 10 * time.Second,
84 NumContextLines: 2,
85 }
86 if page.Limit > 0 {
87 // +1 so we can detect a following page.
88 opts.MaxDocDisplayCount = page.Offset + page.Limit + 1
89 }
90
91 body, err := json.Marshal(jsonSearchArgs{
92 Q: queryStr,
93 Opts: opts,
94 })
95 if err != nil {
96 return nil, fmt.Errorf("marshal request: %w", err)
97 }
98
99 url := strings.TrimRight(s.Host, "/") + "/api/search"
100 req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
101 if err != nil {
102 return nil, fmt.Errorf("build request: %w", err)
103 }
104 req.Header.Set("Content-Type", "application/json")
105
106 resp, err := s.GetClient().Do(req)
107 if err != nil {
108 return nil, fmt.Errorf("do request: %w", err)
109 }
110 defer resp.Body.Close()
111
112 if resp.StatusCode != http.StatusOK {
113 b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
114 return nil, fmt.Errorf("zoekt search: status %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
115 }
116
117 var reply jsonSearchReply
118 if err := json.NewDecoder(resp.Body).Decode(&reply); err != nil {
119 return nil, fmt.Errorf("decode response: %w", err)
120 }
121 if reply.Result == nil {
122 return &SearchResults{}, nil
123 }
124
125 stats := reply.Result.Stats
126 all := toResults(reply.Result)
127 end := page.Offset + page.Limit
128 if page.Limit <= 0 {
129 // No window requested: return everything.
130 return &SearchResults{Results: all, Stats: stats}, nil
131 }
132
133 hasMore := len(all) > end // extra card present ⇒ more pages
134 if page.Offset >= len(all) {
135 return &SearchResults{HasMore: false, Stats: stats}, nil
136 }
137 if end > len(all) {
138 end = len(all)
139 }
140 return &SearchResults{Results: all[page.Offset:end], HasMore: hasMore, Stats: stats}, nil
141}
142
143// RepoCount returns the total number of repositories in the zoekt index.
144func (s *CodeSearch) RepoCount(ctx context.Context) (int, error) {
145 body, err := json.Marshal(jsonListArgs{
146 Q: "", // empty query ⇒ match all repos
147 Opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos},
148 })
149 if err != nil {
150 return 0, fmt.Errorf("marshal request: %w", err)
151 }
152
153 url := strings.TrimRight(s.Host, "/") + "/api/list"
154 req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
155 if err != nil {
156 return 0, fmt.Errorf("build request: %w", err)
157 }
158 req.Header.Set("Content-Type", "application/json")
159
160 resp, err := s.GetClient().Do(req)
161 if err != nil {
162 return 0, fmt.Errorf("do request: %w", err)
163 }
164 defer resp.Body.Close()
165
166 if resp.StatusCode != http.StatusOK {
167 b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
168 return 0, fmt.Errorf("zoekt list: status %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
169 }
170
171 var reply jsonListReply
172 if err := json.NewDecoder(resp.Body).Decode(&reply); err != nil {
173 return 0, fmt.Errorf("decode response: %w", err)
174 }
175 if reply.List == nil {
176 return 0, nil
177 }
178 return reply.List.Stats.Repos, nil
179}
180
181// toResults maps zoekt FileMatches into local Results
182func toResults(sr *zoekt.SearchResult) []models.Result {
183 var out []models.Result
184 for _, fm := range sr.Files {
185 // HACK: zoekt use int64 repo.ID as identifier, but we expect DID (string) as an repo identifier.
186 // as a quick hack without patching zoekt, we extract the DID from RepoURLs
187 repoDID := extractDID(sr.RepoURLs[fm.Repository])
188 res := models.Result{
189 RepoDID: repoDID,
190 FilePath: fm.FileName,
191 Branches: fm.Branches,
192 Commit: fm.Version,
193 Language: fm.Language,
194 }
195 for _, cm := range fm.ChunkMatches {
196 if cm.FileName {
197 res.File = &models.Result_FileMatch{Ranges: cm.Ranges}
198 break
199 } else {
200 res.Chunks = append(res.Chunks, models.Result_ChunkMatch{
201 Content: string(cm.Content),
202 ContentStartLine: int(cm.ContentStart.LineNumber),
203 Ranges: cm.Ranges,
204 })
205 }
206 }
207 out = append(out, res)
208 }
209 return out
210}
211
212// extractDID pulls the repo DID out of a zoekt FileURLTemplate of the form
213// "{appviewURL}/{repoDID}/blob/{commit}/{path}".
214func extractDID(urlTemplate string) syntax.DID {
215 if urlTemplate == "" {
216 return ""
217 }
218 u, err := url.Parse(urlTemplate)
219 if err != nil {
220 return ""
221 }
222 seg := strings.SplitN(strings.TrimPrefix(u.Path, "/"), "/", 2)[0]
223 return syntax.DID(seg)
224}
225
226type repoSearchQuery struct {
227 RepoNames []string
228 Language string
229}
230
231func (r repoSearchQuery) Query() string {
232 parts := append([]string{}, r.RepoNames...)
233 if r.Language != "" {
234 parts = append(parts, "lang:"+r.Language)
235 }
236 return strings.Join(parts, " ")
237}
238
239func asRepoSearch(q query.Q) (repoSearchQuery, bool) {
240 var rs repoSearchQuery
241 if t, ok := q.(*query.Type); ok && t.Type == query.TypeRepo {
242 query.VisitAtoms(t.Child, func(a query.Q) {
243 switch v := a.(type) {
244 case *query.Repo:
245 rs.RepoNames = append(rs.RepoNames, v.Regexp.String())
246 case *query.Substring:
247 rs.RepoNames = append(rs.RepoNames, v.Pattern)
248 case *query.Language:
249 rs.Language = v.Language
250 }
251 })
252 return rs, true
253 }
254 hasRepo, only := false, true
255 query.VisitAtoms(q, func(a query.Q) {
256 switch v := a.(type) {
257 case *query.Repo:
258 hasRepo = true
259 rs.RepoNames = append(rs.RepoNames, v.Regexp.String())
260 case *query.Language:
261 rs.Language = v.Language
262 default:
263 only = false
264 }
265 })
266 return rs, hasRepo && only
267}